Compare commits

..

74 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
87486f87ef revert loading compressed dictionaries 2022-07-12 14:20:01 +02:00
Maarten L. Hekkelman
80e7da0f13 Locating dictionaries updated 2022-07-12 14:13:59 +02:00
Maarten L. Hekkelman
3745beae66 Fix search order for resources 2022-07-12 13:55:19 +02:00
Maarten L. Hekkelman
3965840bfa New way of locating resources 2022-07-12 13:49:02 +02:00
Maarten L. Hekkelman
a88c6f3d32 Fix for older clang (on MacOS?). 2022-07-05 11:07:12 +02:00
Maarten L. Hekkelman
ed6c6f0026 Move assignment of Structure is not possible due to reference to datablock 2022-07-05 09:46:59 +02:00
Maarten L. Hekkelman
bdda9d72b5 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-07-05 09:36:36 +02:00
Maarten L. Hekkelman
fd080e778e Changes for MacOS/MSVC 2022-07-05 09:36:26 +02:00
Maarten L. Hekkelman
9f72df2ecd Update LICENSE
Format changed, now recognized
2022-07-01 08:21:22 +02:00
ojcharles
617db012f0 Update Cif++.cpp (#15)
add mutex library for std::unique_lock, not included in std lib
2022-07-01 08:07:24 +02:00
Maarten L. Hekkelman
9d15541237 include cstring for gnu c++ 12 2022-06-22 08:53:05 +02:00
Maarten L. Hekkelman
35c99564c6 Fix importing sugars from PDB files 2022-06-01 15:17:54 +02:00
Maarten L. Hekkelman
1d8fe334d6 Fix writing sugar branches 2022-06-01 13:22:34 +02:00
Maarten L. Hekkelman
d86bb314ac Better handling of missing residues/mismatch seqres 2022-06-01 11:27:41 +02:00
Maarten L. Hekkelman
0ef8eb59f8 Fix scattering factors error 2022-05-18 13:04:42 +02:00
Maarten L. Hekkelman
b5fe4a9a87 locating resources that might be protected 2022-05-18 11:53:13 +02:00
Maarten L. Hekkelman
11fea31b98 more loading resources 2022-05-18 11:37:26 +02:00
Maarten L. Hekkelman
f629275ed5 locating resources that might be protected 2022-05-18 11:25:47 +02:00
Maarten L. Hekkelman
a5f6166469 locating resources that might be protected 2022-05-18 11:14:14 +02:00
Maarten L. Hekkelman
501050e591 Add move constructor to mmcif::Structure 2022-05-10 17:11:04 +02:00
Maarten L. Hekkelman
e1b240b2b2 sugar work 2022-05-04 16:48:28 +02:00
Maarten L. Hekkelman
3d79278ed7 Merge branch 'trunk' into develop 2022-05-04 09:51:15 +02:00
Maarten L. Hekkelman
5e0b197a43 mmcif::Atom::compound() revision 2022-05-04 09:50:24 +02:00
Maarten L. Hekkelman
9c4170d9e2 - Added more const members
- change PDB writing interface
2022-05-03 11:52:15 +02:00
Maarten L. Hekkelman
af721eb196 Make having no compound less fatal 2022-05-02 14:40:22 +02:00
Maarten L. Hekkelman
788e315f5e Fix entity_branch_link entry 2022-05-02 12:24:35 +02:00
Maarten L. Hekkelman
4a82a8d5a8 Fixed all tests 2022-05-02 11:09:36 +02:00
Maarten L. Hekkelman
11019a26f8 Merge branch 'sugar-tests' into develop 2022-05-02 10:03:44 +02:00
Maarten L. Hekkelman
6f8909dce9 Fixed tests 2022-05-02 10:01:10 +02:00
Maarten L. Hekkelman
5525103aaf backup 2022-05-02 09:26:59 +02:00
Maarten L. Hekkelman
291ef737b1 - Fix removing atoms
- Optimize isUnquotedString
2022-05-01 14:09:06 +02:00
Maarten L. Hekkelman
af125bdd57 backup 2022-04-26 16:04:13 +02:00
Maarten L. Hekkelman
79089bbb8c removed incorrect assert 2022-04-20 16:32:47 +02:00
Maarten L. Hekkelman
1f08498d00 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-04-20 11:18:50 +02:00
Maarten L. Hekkelman
49ba714a03 - structure id stuff
- added cif::null
- more tests
2022-04-20 11:17:11 +02:00
Maarten L. Hekkelman
85fd9296b2 Add test for loading 2022-04-19 17:03:09 +02:00
Maarten L. Hekkelman
1cda14867f More interface changes in mmcif::Structure 2022-04-19 13:40:38 +02:00
Maarten L. Hekkelman
2d2b26f7dc Fix regression in bondmap calculation 2022-04-19 09:10:54 +02:00
Maarten L. Hekkelman
93b33af44a oops, wrong field name 2022-04-13 10:57:50 +02:00
Maarten L. Hekkelman
eb80490bcd getPolymerByAsymID 2022-04-13 09:47:18 +02:00
Maarten L. Hekkelman
ba2b06f5af reduce complexity 2022-04-13 09:39:43 +02:00
Maarten L. Hekkelman
fecc762db1 - better link validation
- better output (quote reserved strings)
2022-04-12 17:00:47 +02:00
Maarten L. Hekkelman
1e406253ab loading unknown atoms 2022-04-12 12:41:25 +02:00
Maarten L. Hekkelman
6e3b85f43d getResidue, again 2022-04-11 16:36:40 +02:00
Maarten L. Hekkelman
58f1b626e2 change getResidue 2022-04-06 12:49:03 +02:00
Maarten L. Hekkelman
c104a08e16 fixed Atom::charge to pick more sensible default 2022-03-30 11:14:11 +02:00
Maarten L. Hekkelman
dd0f6ca1e6 accept more invalid characters, sigh 2022-03-29 11:45:33 +02:00
Maarten L. Hekkelman
f02ea91b51 label and auth seq id, some improvements 2022-03-28 09:50:37 +02:00
Maarten L. Hekkelman
6768a501a3 access to atoms 2022-03-21 09:58:16 +01:00
Maarten L. Hekkelman
879e15c759 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-03-14 16:28:55 +01:00
Maarten L. Hekkelman
89285b4abc construct quaternion from angle/axis 2022-03-14 16:28:41 +01:00
Maarten L. Hekkelman
c584714f91 ion radii 2022-03-09 15:40:32 +01:00
Maarten L. Hekkelman
f5016403b7 refactored mmcif::File 2022-03-02 15:26:29 +01:00
Maarten L. Hekkelman
c8f66ae6bb start remove residue 2022-02-23 08:24:26 +01:00
Maarten L. Hekkelman
858c967e71 Locate mmcif dictionary in CCP4 space 2022-02-15 08:08:01 +01:00
Maarten L. Hekkelman
f9ca5de5bf Add missing include for gcc 8.2 2022-02-09 16:04:24 +01:00
Maarten L. Hekkelman
252c3476a1 Slightly better handling of hetero residues 2022-02-09 14:53:05 +01:00
Maarten L. Hekkelman
19210df6db Fix parsing mmCIF files with an unquoted string ?? 2022-02-08 11:22:10 +01:00
Maarten L. Hekkelman
15c5730749 Remove redundant FindFilesystem include 2022-02-03 10:35:34 +01:00
Maarten L. Hekkelman
3764adb7ef update changelog 2022-02-02 13:44:32 +01:00
Maarten L. Hekkelman
9160adb1cf Merge branch 'develop' into trunk 2022-02-02 13:40:47 +01:00
Maarten L. Hekkelman
3ebf4338ab Do not crash on uninitialized Atoms 2022-02-02 12:41:32 +01:00
Maarten L. Hekkelman
2eb4b7b39b Fix building in Windows 2022-01-25 15:27:15 +01:00
Maarten L. Hekkelman
c241e49b48 fix makefile 2022-01-25 15:13:15 +01:00
Maarten L. Hekkelman
238c881132 Update dependencies, version string 2022-01-25 13:27:58 +01:00
Maarten L. Hekkelman
49dc733536 Create non poly from described atoms 2022-01-25 13:27:19 +01:00
Maarten L. Hekkelman
755bd78f60 Fix declaration for mmcif::Nudge 2022-01-19 13:21:29 +01:00
Maarten L. Hekkelman
77f80cd51f Fix atomic test (apparently, libatomic is only needed for std::atomic<long long>) 2022-01-19 08:25:25 +01:00
Maarten L. Hekkelman
3df6000635 cleaning up code 2022-01-18 16:06:28 +01:00
Maarten L. Hekkelman
5efee2b40d comment adjusted 2022-01-18 13:28:42 +01:00
Maarten L. Hekkelman
f3c2e59184 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-01-18 11:26:23 +01:00
Maarten L. Hekkelman
24ab660e6e Change logic for testing std::filesystem and libatomic 2022-01-18 11:24:31 +01:00
Maarten L. Hekkelman
6c0a418068 Revert "Check atomic"
This reverts commit 07a180991e.
2022-01-18 11:12:56 +01:00
Maarten L. Hekkelman
07a180991e Check atomic 2022-01-17 11:40:46 +01:00
34 changed files with 3914 additions and 1811 deletions

4
.gitignore vendored
View File

@@ -11,4 +11,6 @@ data/components.cif*
CMakeSettings.json
msvc/
Testing/
rsrc/feature-request.txt
test/1cbs.cif
test/test-create_sugar_?.cif

View File

@@ -25,7 +25,7 @@
cmake_minimum_required(VERSION 3.16)
# set the project name
project(cifpp VERSION 3.0.2 LANGUAGES CXX)
project(cifpp VERSION 4.2.0 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
@@ -37,17 +37,13 @@ include(CheckIncludeFiles)
include(CheckLibraryExists)
include(CMakePackageConfigHelpers)
include(Dart)
include(FindFilesystem)
include(GenerateExportHeader)
set(CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
find_package(Filesystem REQUIRED)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# https://stackoverflow.com/questions/63902528/program-crashes-when-filesystempath-is-destroyed
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
elseif(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
@@ -74,6 +70,8 @@ if(BUILD_FOR_CCP4)
set(BUILD_SHARED_LIBS ON)
endif()
endif("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
add_definitions(-DCCP4=1)
endif()
# Check if CCP4 is available
@@ -122,20 +120,6 @@ if(MSVC)
message(STATUS "The library and auxiliary files will be installed in $ENV{LOCALAPPDATA}/${PROJECT_NAME}")
set(CMAKE_INSTALL_PREFIX "$ENV{LOCALAPPDATA}/${PROJECT_NAME}" CACHE PATH "..." FORCE)
endif()
# Find out the processor type for the target
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
set(COFF_TYPE "x64")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
set(COFF_TYPE "x86")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ARM64")
set(COFF_TYPE "arm64")
else()
message(FATAL_ERROR "Unsupported or unknown processor type ${CMAKE_SYSTEM_PROCESSOR}")
endif()
# for mrc, just in case
list(APPEND CMAKE_PREFIX_PATH "$ENV{LOCALAPPDATA}/mrc")
endif()
if(UNIX AND NOT APPLE AND NOT BUILD_FOR_CCP4 AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
@@ -175,24 +159,18 @@ find_package(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex program_opt
if(NOT MSVC AND Boost_USE_STATIC_LIBS)
find_package(ZLIB REQUIRED)
list(APPEND CIFPP_REQUIRED_LIBRARIES ZLIB::ZLIB)
endif()
include(FindFilesystem)
list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPFS_LIBRARY})
include(FindAtomic)
list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPATOMIC_LIBRARY})
# Create a revision file, containing the current git version info
include(GetGitRevisionDescription)
option(GENERATE_CUSTOM_VERSION "Generate a custom version string" OFF)
if(GIT-NOTFOUND OR HEAD-HASH-NOTFOUND OR NOT GENERATE_CUSTOM_VERSION)
get_git_head_revision(REFSPEC COMMITHASH)
# Generate our own version string
git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty)
else()
SET(BUILD_VERSION_STRING ${PROJECT_VERSION})
endif()
# generate version.h
string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
configure_file("${CMAKE_SOURCE_DIR}/src/revision.hpp.in" "${CMAKE_BINARY_DIR}/revision.hpp" @ONLY)
include(VersionString)
write_version_header("LibCIFPP")
# SymOp data table
if(CIFPP_RECREATE_SYMOP_DATA)
@@ -200,7 +178,7 @@ if(CIFPP_RECREATE_SYMOP_DATA)
add_executable(symop-map-generator "${CMAKE_SOURCE_DIR}/tools/symop-map-generator.cpp")
target_link_libraries(symop-map-generator Threads::Threads ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES})
target_link_libraries(symop-map-generator Threads::Threads ${Boost_LIBRARIES} ${CIFPP_REQUIRED_LIBRARIES})
if(Boost_INCLUDE_DIR)
target_include_directories(symop-map-generator PUBLIC ${Boost_INCLUDE_DIR})
endif()
@@ -271,7 +249,8 @@ target_include_directories(cifpp
${CMAKE_BINARY_DIR}
)
target_link_libraries(cifpp Threads::Threads ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES})
target_link_libraries(cifpp PUBLIC Threads::Threads Boost::regex Boost::iostreams ${CIFPP_REQUIRED_LIBRARIES})
# target_link_libraries(cifpp PRIVATE)
if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
@@ -331,7 +310,7 @@ install(TARGETS cifpp
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(MSVC)
if(MSVC AND BUILD_SHARED_LIBS)
install(
FILES $<TARGET_PDB_FILE:${PROJECT_NAME}>
DESTINATION ${CMAKE_INSTALL_LIBDIR}
@@ -415,6 +394,7 @@ if(CIFPP_BUILD_TESTS)
# pdb2cif
rename-compound
structure
sugar
unit)
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
@@ -428,7 +408,7 @@ if(CIFPP_BUILD_TESTS)
${CMAKE_CURRENT_BINARY_DIR} # for config.h
)
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES})
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp )
if(CIFPP_USE_RSRC)
mrc_target_resources(${CIFPP_TEST} ${CMAKE_SOURCE_DIR}/rsrc/mmcif_pdbx_v50.dic)

View File

@@ -1,7 +1,8 @@
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex program_options)
find_dependency(Threads)
find_dependency(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex)
if(NOT WIN32)
find_dependency(ZLIB)
endif()

View File

@@ -1,6 +1,7 @@
SPDX-License-Identifier: BSD-2-Clause
BSD-2-Clause License
Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@@ -20,4 +21,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -1,3 +1,35 @@
Version 4.2.0
- Yet another rewrite of resource loading
Version 4.1.1
- Fall back to zero charge for scattering factors if the atom
was not found in the table.
- Improve code to locate resources, failing less.
Version 4.1.0
- Some interface changes for mmcif::Atom
Version 4.0.1
- Added a bunch of const methods to Datablock and Category.
- Changed PDB writing interface to accept Datablock instead of File.
Version 4.0.0
- getResidue in mmcif::Structure now requires both a
sequence ID and an auth sequence ID. As a result the code was cleaned
up considerably.
Version 3.0.5
- mmcif::Structure redesign. It is now a wrapper around a cif::Datablock.
Version 3.0.4
- Fix in mmCIF parser, now correctly handles the unquoted
string ??
Version 3.0.3
- Better configuration checks, for atomic e.g.
- Fixed a problem introduced in refactoring mmcif::Atom
- Version string creation
Version 3.0.2
- refactored mmcif::Atom for performance reasons
@@ -12,6 +44,9 @@ Version 3.0.0
- Upgraded mmcif::Structure
- various other small fixes
Version 2.0.5
- Backporting updated CMakeLists.txt file
Version 2.0.4
- Reverted a too strict test when reading cif files.

64
cmake/FindAtomic.cmake Normal file
View File

@@ -0,0 +1,64 @@
# Simple check to see if we need a library for std::atomic
if(TARGET std::atomic)
return()
endif()
cmake_minimum_required(VERSION 3.10)
include(CMakePushCheckState)
include(CheckIncludeFileCXX)
include(CheckCXXSourceRuns)
cmake_push_check_state()
set(CMAKE_CXX_STANDARD 17)
check_include_file_cxx("atomic" _CXX_ATOMIC_HAVE_HEADER)
mark_as_advanced(_CXX_ATOMIC_HAVE_HEADER)
set(code [[
#include <atomic>
int main(int argc, char** argv) {
std::atomic<long long> s;
++s;
return 0;
}
]])
check_cxx_source_runs("${code}" _CXX_ATOMIC_BUILTIN)
if(_CXX_ATOMIC_BUILTIN)
set(_found 1)
else()
list(APPEND CMAKE_REQUIRED_LIBRARIES atomic)
list(APPEND FOLLY_LINK_LIBRARIES atomic)
check_cxx_source_runs("${code}" _CXX_ATOMIC_LIB_NEEDED)
if (NOT _CXX_ATOMIC_LIB_NEEDED)
message(FATAL_ERROR "unable to link C++ std::atomic code: you may need \
to install GNU libatomic")
else()
set(_found 1)
endif()
endif()
if(_found)
add_library(std::atomic INTERFACE IMPORTED)
set_property(TARGET std::atomic APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_14)
if(_CXX_ATOMIC_BUILTIN)
# Nothing to add...
elseif(_CXX_ATOMIC_LIB_NEEDED)
set_target_properties(std::atomic PROPERTIES IMPORTED_LIBNAME atomic)
set(STDCPPATOMIC_LIBRARY atomic)
endif()
endif()
cmake_pop_check_state()
set(Atomic_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::atomic" FORCE)
if(Atomic_FIND_REQUIRED AND NOT Atomic_FOUND)
message(FATAL_ERROR "Cannot run simple program using std::atomic")
endif()

View File

@@ -59,8 +59,10 @@ if(_found)
# Nothing to add...
elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED)
set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME stdc++fs)
set(STDCPPFS_LIBRARY stdc++fs)
elseif(CXX_FILESYSTEM_CPPFS_NEEDED)
set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME c++fs)
set(STDCPPFS_LIBRARY c++fs)
endif()
endif()

77
cmake/VersionString.cmake Normal file
View File

@@ -0,0 +1,77 @@
# SPDX-License-Identifier: BSD-2-Clause
# Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.15)
# Create a revision file, containing the current git version info, if any
function(write_version_header)
include(GetGitRevisionDescription)
if(NOT(GIT-NOTFOUND OR HEAD-HASH-NOTFOUND))
git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty)
if(BUILD_VERSION_STRING MATCHES "build-([0-9]+)-g([0-9a-f]+)(-dirty)?")
set(BUILD_GIT_TAGREF "${CMAKE_MATCH_2}")
if(CMAKE_MATCH_3)
set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}*")
else()
set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}")
endif()
endif()
else()
set(BUILD_VERSION_STRING "no git info available")
endif()
include_directories(${CMAKE_BINARY_DIR} PRIVATE)
string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
if(ARGC GREATER 0)
set(VAR_PREFIX "${ARGV0}")
endif()
file(WRITE "${CMAKE_BINARY_DIR}/revision.hpp.in" [[// Generated revision file
#pragma once
#include <ostream>
const char k@VAR_PREFIX@ProjectName[] = "@PROJECT_NAME@";
const char k@VAR_PREFIX@VersionNumber[] = "@PROJECT_VERSION@";
const char k@VAR_PREFIX@VersionGitTag[] = "@BUILD_GIT_TAGREF@";
const char k@VAR_PREFIX@BuildInfo[] = "@BUILD_VERSION_STRING@";
const char k@VAR_PREFIX@BuildDate[] = "@BUILD_DATE_TIME@";
inline void write_version_string(std::ostream &os, bool verbose)
{
os << k@VAR_PREFIX@ProjectName << " version " << k@VAR_PREFIX@VersionNumber << std::endl;
if (verbose)
{
os << "build: " << k@VAR_PREFIX@BuildInfo << ' ' << k@VAR_PREFIX@BuildDate << std::endl;
if (k@VAR_PREFIX@VersionGitTag[0] != 0)
os << "git tag: " << k@VAR_PREFIX@VersionGitTag << std::endl;
}
}
]])
configure_file("${CMAKE_BINARY_DIR}/revision.hpp.in" "${CMAKE_BINARY_DIR}/revision.hpp" @ONLY)
endfunction()

View File

@@ -2394,3 +2394,189 @@ VAL "Create component" 1999-07-08 RCSB
VAL "Modify descriptor" 2011-06-04 RCSB
#
data_NAG
#
_chem_comp.id NAG
_chem_comp.name 2-acetamido-2-deoxy-beta-D-glucopyranose
_chem_comp.type "D-saccharide, beta linking"
_chem_comp.pdbx_type ATOMS
_chem_comp.formula "C8 H15 N O6"
_chem_comp.mon_nstd_parent_comp_id ?
_chem_comp.pdbx_synonyms
;N-acetyl-beta-D-glucosamine; 2-acetamido-2-deoxy-beta-D-glucose; 2-acetamido-2-deoxy-D-glucose;
2-acetamido-2-deoxy-glucose; N-ACETYL-D-GLUCOSAMINE
;
_chem_comp.pdbx_formal_charge 0
_chem_comp.pdbx_initial_date 1999-07-08
_chem_comp.pdbx_modified_date 2020-07-17
_chem_comp.pdbx_ambiguous_flag N
_chem_comp.pdbx_release_status REL
_chem_comp.pdbx_replaced_by ?
_chem_comp.pdbx_replaces ?
_chem_comp.formula_weight 221.208
_chem_comp.one_letter_code ?
_chem_comp.three_letter_code NAG
_chem_comp.pdbx_model_coordinates_details ?
_chem_comp.pdbx_model_coordinates_missing_flag N
_chem_comp.pdbx_ideal_coordinates_details Corina
_chem_comp.pdbx_ideal_coordinates_missing_flag N
_chem_comp.pdbx_model_coordinates_db_code 8PCH
_chem_comp.pdbx_subcomponent_list ?
_chem_comp.pdbx_processing_site RCSB
# #
loop_
_pdbx_chem_comp_synonyms.ordinal
_pdbx_chem_comp_synonyms.comp_id
_pdbx_chem_comp_synonyms.name
_pdbx_chem_comp_synonyms.provenance
_pdbx_chem_comp_synonyms.type
1 NAG N-acetyl-beta-D-glucosamine PDB ?
2 NAG 2-acetamido-2-deoxy-beta-D-glucose PDB ?
3 NAG 2-acetamido-2-deoxy-D-glucose PDB ?
4 NAG 2-acetamido-2-deoxy-glucose PDB ?
5 NAG N-ACETYL-D-GLUCOSAMINE PDB ?
# #
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.alt_atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.charge
_chem_comp_atom.pdbx_align
_chem_comp_atom.pdbx_aromatic_flag
_chem_comp_atom.pdbx_leaving_atom_flag
_chem_comp_atom.pdbx_stereo_config
_chem_comp_atom.model_Cartn_x
_chem_comp_atom.model_Cartn_y
_chem_comp_atom.model_Cartn_z
_chem_comp_atom.pdbx_model_Cartn_x_ideal
_chem_comp_atom.pdbx_model_Cartn_y_ideal
_chem_comp_atom.pdbx_model_Cartn_z_ideal
_chem_comp_atom.pdbx_component_atom_id
_chem_comp_atom.pdbx_component_comp_id
_chem_comp_atom.pdbx_ordinal
NAG C1 C1 C 0 1 N N R 7.396 28.163 26.662 0.185 1.082 -0.421 C1 NAG 1
NAG C2 C2 C 0 1 N N R 6.973 29.233 27.644 0.790 -0.220 0.112 C2 NAG 2
NAG C3 C3 C 0 1 N N R 7.667 29.055 29.000 -0.124 -1.390 -0.265 C3 NAG 3
NAG C4 C4 C 0 1 N N S 7.573 27.588 29.490 -1.526 -1.129 0.294 C4 NAG 4
NAG C5 C5 C 0 1 N N R 7.902 26.592 28.373 -2.042 0.207 -0.246 C5 NAG 5
NAG C6 C6 C 0 1 N N N 7.599 25.173 28.797 -3.417 0.504 0.355 C6 NAG 6
NAG C7 C7 C 0 1 N N N 6.291 31.299 26.595 3.197 0.157 0.076 C7 NAG 7
NAG C8 C8 C 0 1 N N N 6.684 32.649 26.036 4.559 -0.052 -0.533 C8 NAG 8
NAG N2 N2 N 0 1 N N N 7.268 30.545 27.089 2.114 -0.422 -0.480 N2 NAG 9
NAG O1 O1 O 0 1 N Y N 6.676 28.363 25.419 1.003 2.185 -0.024 O1 NAG 10
NAG O3 O3 O 0 1 N N N 7.038 29.909 29.947 0.395 -2.600 0.291 O3 NAG 11
NAG O4 O4 O 0 1 N N N 8.494 27.358 30.574 -2.405 -2.180 -0.114 O4 NAG 12
NAG O5 O5 O 0 1 N N N 7.104 26.875 27.206 -1.130 1.248 0.113 O5 NAG 13
NAG O6 O6 O 0 1 N N N 6.232 25.040 29.165 -3.949 1.691 -0.236 O6 NAG 14
NAG O7 O7 O 0 1 N N N 5.114 30.936 26.562 3.074 0.845 1.067 O7 NAG 15
NAG H1 H1 H 0 1 N N N 8.477 28.257 26.481 0.133 1.040 -1.509 H1 NAG 16
NAG H2 H2 H 0 1 N N N 5.888 29.146 27.803 0.879 -0.163 1.197 H2 NAG 17
NAG H3 H3 H 0 1 N N N 8.729 29.321 28.892 -0.174 -1.478 -1.350 H3 NAG 18
NAG H4 H4 H 0 1 N N N 6.544 27.403 29.831 -1.483 -1.091 1.382 H4 NAG 19
NAG H5 H5 H 0 1 N N N 8.971 26.674 28.128 -2.123 0.154 -1.332 H5 NAG 20
NAG H61 H61 H 0 1 N N N 7.816 24.492 27.961 -4.088 -0.333 0.157 H61 NAG 21
NAG H62 H62 H 0 1 N N N 8.232 24.910 29.657 -3.320 0.645 1.431 H62 NAG 22
NAG H81 H81 H 0 1 N N N 5.791 33.159 25.646 4.560 0.320 -1.558 H81 NAG 23
NAG H82 H82 H 0 1 N N N 7.136 33.258 26.833 5.305 0.490 0.050 H82 NAG 24
NAG H83 H83 H 0 1 N N N 7.411 32.511 25.222 4.799 -1.115 -0.532 H83 NAG 25
NAG HN2 HN2 H 0 1 N N N 8.210 30.881 27.079 2.212 -0.973 -1.273 HN2 NAG 26
NAG HO1 HO1 H 0 1 N Y N 6.933 27.696 24.793 0.679 3.044 -0.328 HO1 NAG 27
NAG HO3 HO3 H 0 1 N Y N 7.459 29.809 30.793 -0.135 -3.384 0.091 HO3 NAG 28
NAG HO4 HO4 H 0 1 N Y N 8.425 26.456 30.863 -3.312 -2.079 0.206 HO4 NAG 29
NAG HO6 HO6 H 0 1 N Y N 6.060 24.143 29.428 -4.822 1.940 0.099 HO6 NAG 30
# #
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.value_order
_chem_comp_bond.pdbx_aromatic_flag
_chem_comp_bond.pdbx_stereo_config
_chem_comp_bond.pdbx_ordinal
NAG C1 C2 SING N N 1
NAG C1 O1 SING N N 2
NAG C1 O5 SING N N 3
NAG C1 H1 SING N N 4
NAG C2 C3 SING N N 5
NAG C2 N2 SING N N 6
NAG C2 H2 SING N N 7
NAG C3 C4 SING N N 8
NAG C3 O3 SING N N 9
NAG C3 H3 SING N N 10
NAG C4 C5 SING N N 11
NAG C4 O4 SING N N 12
NAG C4 H4 SING N N 13
NAG C5 C6 SING N N 14
NAG C5 O5 SING N N 15
NAG C5 H5 SING N N 16
NAG C6 O6 SING N N 17
NAG C6 H61 SING N N 18
NAG C6 H62 SING N N 19
NAG C7 C8 SING N N 20
NAG C7 N2 SING N N 21
NAG C7 O7 DOUB N N 22
NAG C8 H81 SING N N 23
NAG C8 H82 SING N N 24
NAG C8 H83 SING N N 25
NAG N2 HN2 SING N N 26
NAG O1 HO1 SING N N 27
NAG O3 HO3 SING N N 28
NAG O4 HO4 SING N N 29
NAG O6 HO6 SING N N 30
# #
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
NAG SMILES ACDLabs 12.01 "O=C(NC1C(O)C(O)C(OC1O)CO)C"
NAG InChI InChI 1.03 "InChI=1S/C8H15NO6/c1-3(11)9-5-7(13)6(12)4(2-10)15-8(5)14/h4-8,10,12-14H,2H2,1H3,(H,9,11)/t4-,5-,6-,7-,8-/m1/s1"
NAG InChIKey InChI 1.03 OVRNDRQMDRJTHS-FMDGEEDCSA-N
NAG SMILES_CANONICAL CACTVS 3.370 "CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O"
NAG SMILES CACTVS 3.370 "CC(=O)N[CH]1[CH](O)O[CH](CO)[CH](O)[CH]1O"
NAG SMILES_CANONICAL "OpenEye OEToolkits" 1.7.6 "CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O"
NAG SMILES "OpenEye OEToolkits" 1.7.6 "CC(=O)NC1C(C(C(OC1O)CO)O)O"
# #
loop_
_pdbx_chem_comp_identifier.comp_id
_pdbx_chem_comp_identifier.type
_pdbx_chem_comp_identifier.program
_pdbx_chem_comp_identifier.program_version
_pdbx_chem_comp_identifier.identifier
NAG "SYSTEMATIC NAME" ACDLabs 12.01 "2-(acetylamino)-2-deoxy-beta-D-glucopyranose"
NAG "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.7.6 "N-[(2R,3R,4R,5S,6R)-6-(hydroxymethyl)-2,4,5-tris(oxidanyl)oxan-3-yl]ethanamide"
NAG "CONDENSED IUPAC CARBOHYDRATE SYMBOL" GMML 1.0 DGlcpNAcb
NAG "COMMON NAME" GMML 1.0 N-acetyl-b-D-glucopyranosamine
NAG "IUPAC CARBOHYDRATE SYMBOL" PDB-CARE 1.0 b-D-GlcpNAc
NAG "SNFG CARBOHYDRATE SYMBOL" GMML 1.0 GlcNAc
# #
loop_
_pdbx_chem_comp_feature.comp_id
_pdbx_chem_comp_feature.type
_pdbx_chem_comp_feature.value
_pdbx_chem_comp_feature.source
_pdbx_chem_comp_feature.support
NAG "CARBOHYDRATE ISOMER" D PDB ?
NAG "CARBOHYDRATE RING" pyranose PDB ?
NAG "CARBOHYDRATE ANOMER" beta PDB ?
NAG "CARBOHYDRATE PRIMARY CARBONYL GROUP" aldose PDB ?
# #
loop_
_pdbx_chem_comp_audit.comp_id
_pdbx_chem_comp_audit.action_type
_pdbx_chem_comp_audit.date
_pdbx_chem_comp_audit.processing_site
NAG "Create component" 1999-07-08 RCSB
NAG "Modify descriptor" 2011-06-04 RCSB
NAG "Modify leaving atom flag" 2011-07-01 RCSB
NAG "Modify leaving atom flag" 2012-11-26 RCSB
NAG "Other modification" 2019-08-12 RCSB
NAG "Other modification" 2019-12-19 RCSB
NAG "Other modification" 2020-07-03 RCSB
NAG "Modify name" 2020-07-17 RCSB
NAG "Modify synonyms" 2020-07-17 RCSB
##

View File

@@ -172,19 +172,26 @@ enum AtomType : uint8_t
// --------------------------------------------------------------------
// AtomTypeInfo
enum RadiusType
enum class RadiusType
{
eRadiusCalculated,
eRadiusEmpirical,
eRadiusCovalentEmpirical,
Calculated,
Empirical,
CovalentEmpirical,
eRadiusSingleBond,
eRadiusDoubleBond,
eRadiusTripleBond,
SingleBond,
DoubleBond,
TripleBond,
eRadiusVanderWaals,
VanderWaals,
eRadiusTypeCount
TypeCount
};
constexpr size_t RadiusTypeCount = static_cast<size_t>(RadiusType::TypeCount);
enum class IonicRadiusType
{
Effective, Crystal
};
struct AtomTypeInfo
@@ -194,7 +201,7 @@ struct AtomTypeInfo
std::string symbol;
float weight;
bool metal;
float radii[eRadiusTypeCount];
float radii[RadiusTypeCount];
};
extern const AtomTypeInfo kKnownAtoms[];
@@ -218,11 +225,32 @@ class AtomTypeTraits
static bool isElement(const std::string &symbol);
static bool isMetal(const std::string &symbol);
float radius(RadiusType type = eRadiusSingleBond) const
float radius(RadiusType type = RadiusType::SingleBond) const
{
if (type >= eRadiusTypeCount)
if (type >= RadiusType::TypeCount)
throw std::invalid_argument("invalid radius requested");
return mInfo->radii[type] / 100.f;
return mInfo->radii[static_cast<size_t>(type)] / 100.f;
}
/// \brief Return the radius for a charged version of this atom in a solid crystal
///
/// \param charge The charge of the ion
/// \return The radius of the ion
float crystal_ionic_radius(int charge) const;
/// \brief Return the radius for a charged version of this atom in a non-solid environment
///
/// \param charge The charge of the ion
/// \return The radius of the ion
float effective_ionic_radius(int charge) const;
/// \brief Return the radius for a charged version of this atom, returns the effective radius by default
///
/// \param charge The charge of the ion
/// \return The radius of the ion
float ionic_radius(int charge, IonicRadiusType type = IonicRadiusType::Effective) const
{
return type == IonicRadiusType::Effective ? effective_ionic_radius(charge) : crystal_ionic_radius(charge);
}
// data type encapsulating the Waasmaier & Kirfel scattering factors

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -26,82 +26,82 @@
#pragma once
#include <string>
#include <array>
#include <cstring>
#include <functional>
#include <iomanip>
#include <iostream>
#include <list>
#include <optional>
#include <regex>
#include <set>
#include <sstream>
#include <iomanip>
#include <shared_mutex>
#include <sstream>
#include <string>
#include <boost/format.hpp>
#include "cif++/CifUtils.hpp"
/*
Simple C++ interface to CIF files.
Assumptions: a file contains one or more datablocks modelled by the class datablock.
Each datablock contains categories. These map to the original tables used to fill
the mmCIF file. Each Category can contain multiple Items, the columns in the table.
Values are stored as character strings internally.
Synopsis:
// create a cif file
cif::datablock e("1MVE");
e.append(cif::Category{"_entry", { "id", "1MVE" } });
cif::Category atomSite("atom_site");
size_t nr{};
for (auto& myAtom: atoms)
{
atomSite.push_back({
{ "group_PDB", "ATOM" },
{ "id", ++nr },
{ "type_symbol", myAtom.type.str() },
...
});
}
e.append(move(atomSite));
cif::File f;
f.append(e);
ofstream os("1mve.cif");
f.write(os);
Simple C++ interface to CIF files.
// read
f.read(ifstream{"1mve.cif"});
auto& e = f.firstDatablock();
cout << "ID of datablock: " << e.id() << endl;
auto& atoms = e["atom_site"];
for (auto& atom: atoms)
{
cout << atom["group_PDB"] << ", "
<< atom["id"] << ", "
...
Assumptions: a file contains one or more datablocks modelled by the class datablock.
Each datablock contains categories. These map to the original tables used to fill
the mmCIF file. Each Category can contain multiple Items, the columns in the table.
float x, y, z;
cif::tie(x, y, z) = atom.get("Cartn_x", "Cartn_y", "Cartn_z");
...
}
Values are stored as character strings internally.
Another way of querying a Category is by using this construct:
auto cat& = e["atom_site"];
auto Rows = cat.find(Key("label_asym_id") == "A" and Key("label_seq_id") == 1);
Synopsis:
// create a cif file
cif::datablock e("1MVE");
e.append(cif::Category{"_entry", { "id", "1MVE" } });
cif::Category atomSite("atom_site");
size_t nr{};
for (auto& myAtom: atoms)
{
atomSite.push_back({
{ "group_PDB", "ATOM" },
{ "id", ++nr },
{ "type_symbol", myAtom.type.str() },
...
});
}
e.append(move(atomSite));
cif::File f;
f.append(e);
ofstream os("1mve.cif");
f.write(os);
// read
f.read(ifstream{"1mve.cif"});
auto& e = f.firstDatablock();
cout << "ID of datablock: " << e.id() << endl;
auto& atoms = e["atom_site"];
for (auto& atom: atoms)
{
cout << atom["group_PDB"] << ", "
<< atom["id"] << ", "
...
float x, y, z;
cif::tie(x, y, z) = atom.get("Cartn_x", "Cartn_y", "Cartn_z");
...
}
Another way of querying a Category is by using this construct:
auto cat& = e["atom_site"];
auto Rows = cat.find(Key("label_asym_id") == "A" and Key("label_seq_id") == 1);
*/
@@ -147,12 +147,12 @@ class Item
Item(std::string_view name, char value)
: mName(name)
, mValue({ value })
, mValue({value})
{
}
template<typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
Item(std::string_view name, const T& value, const char *fmt)
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
Item(std::string_view name, const T &value, const char *fmt)
: mName(name)
, mValue((boost::format(fmt) % value).str())
{
@@ -176,6 +176,7 @@ class Item
, mValue(rhs.mValue)
{
}
Item(Item &&rhs) noexcept
: mName(std::move(rhs.mName))
, mValue(std::move(rhs.mValue))
@@ -253,8 +254,15 @@ class Datablock
const_iterator begin() const { return mCategories.begin(); }
const_iterator end() const { return mCategories.end(); }
/// \brief Access to the category with name \a name, will create it if it doesn't exist.
Category &operator[](std::string_view name);
// /// \brief Access to the category with name \a name, will throw if it doesn't exist.
// const Category &operator[](std::string_view name) const;
/// \brief Access to the category with name \a name, will return an empty category if is doesn't exist.
const Category &operator[](std::string_view name) const;
std::tuple<iterator, bool> emplace(std::string_view name);
bool isValid();
@@ -276,14 +284,17 @@ class Datablock
friend bool operator==(const Datablock &lhs, const Datablock &rhs);
friend std::ostream& operator<<(std::ostream &os, const Datablock &data);
friend std::ostream &operator<<(std::ostream &os, const Datablock &data);
private:
CategoryList mCategories; // LRU
CategoryList mCategories; // LRU
mutable std::shared_mutex mLock;
std::string mName;
const Validator *mValidator;
Datablock *mNext;
// for returning empty categories in the const operator[]
mutable std::unique_ptr<Category> mNullCategory;
};
// --------------------------------------------------------------------
@@ -1183,6 +1194,9 @@ struct Empty
{
};
/// \brief A helper to make it possible to have conditions like ("id"_key == cif::null)
inline constexpr Empty null = Empty();
struct Key
{
explicit Key(const std::string &itemTag)
@@ -1622,7 +1636,7 @@ class conditional_iterator_proxy
using iterator = conditional_iterator_impl;
using reference = typename iterator::reference;
template<typename... Ns>
template <typename... Ns>
conditional_iterator_proxy(CategoryType &cat, row_iterator pos, Condition &&cond, Ns... names);
conditional_iterator_proxy(conditional_iterator_proxy &&p);
@@ -1859,6 +1873,9 @@ class Category
Row front() { return Row(mHead); }
Row back() { return Row(mTail); }
const Row front() const { return Row(mHead); }
const Row back() const { return Row(mTail); }
Row operator[](Condition &&cond);
const Row operator[](Condition &&cond) const;
@@ -1893,7 +1910,7 @@ class Category
conditional_iterator_proxy<const Category> find(const_iterator pos, Condition &&cond) const
{
return conditional_iterator_proxy<const Category>{const_cast<Category&>(*this), pos, std::forward<Condition>(cond)};
return conditional_iterator_proxy<const Category>{const_cast<Category &>(*this), pos, std::forward<Condition>(cond)};
}
template <typename... Ts, typename... Ns>
@@ -1914,14 +1931,14 @@ class Category
conditional_iterator_proxy<Category, Ts...> find(const_iterator pos, Condition &&cond, Ns... names)
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
return {*this, pos, std::forward<Condition>(cond), std::forward<Ns>(names)... };
return {*this, pos, std::forward<Condition>(cond), std::forward<Ns>(names)...};
}
template <typename... Ts, typename... Ns>
conditional_iterator_proxy<const Category, Ts...> find(const_iterator pos, Condition &&cond, Ns... names) const
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
return {*this, pos, std::forward<Condition>(cond), std::forward<Ns>(names)... };
return {*this, pos, std::forward<Condition>(cond), std::forward<Ns>(names)...};
}
// --------------------------------------------------------------------
@@ -1964,13 +1981,13 @@ class Category
}
template <typename T>
T find1(Condition &&cond, const char* column) const
T find1(Condition &&cond, const char *column) const
{
return find1<T>(cbegin(), std::forward<Condition>(cond), column);
}
template <typename T>
T find1(const_iterator pos, Condition &&cond, const char* column) const
T find1(const_iterator pos, Condition &&cond, const char *column) const
{
auto h = find<T>(pos, std::forward<Condition>(cond), column);
@@ -2169,17 +2186,21 @@ class File
File();
File(std::istream &is, bool validate = false);
File(const std::filesystem::path &path, bool validate = false);
File(const char *data, size_t length); // good luck trying to find out what it is...
File(File &&rhs);
File(const File &rhs) = delete;
File &operator=(const File &rhs) = delete;
~File();
virtual ~File();
void load(const std::filesystem::path &p);
void save(const std::filesystem::path &p);
virtual void load(const std::filesystem::path &p);
virtual void save(const std::filesystem::path &p);
void load(std::istream &is);
void save(std::ostream &os);
virtual void load(std::istream &is);
virtual void save(std::ostream &os);
virtual void load(const char *data, std::size_t length);
/// \brief Load only the data block \a datablock from the mmCIF file
void load(std::istream &is, const std::string &datablock);
@@ -2211,6 +2232,12 @@ class File
void append(Datablock *e);
Datablock &emplace(std::string_view name)
{
append(new Datablock(name));
return back();
}
Datablock *get(std::string_view name) const;
Datablock &operator[](std::string_view name);
@@ -2248,6 +2275,9 @@ class File
iterator begin() const;
iterator end() const;
Datablock &front();
Datablock &back();
bool empty() const { return mHead == nullptr; }
const Validator &getValidator() const;

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -28,12 +28,12 @@
#include "cif++/Cif++.hpp"
void WritePDBFile(std::ostream& pdbFile, cif::File& cifFile);
void WritePDBFile(std::ostream &os, const cif::Datablock &data);
/// \brief Just the HEADER, COMPND, SOURCE and AUTHOR lines
void WritePDBHeaderLines(std::ostream& os, cif::File& cifFile);
void WritePDBHeaderLines(std::ostream &os, const cif::Datablock &data);
std::string GetPDBHEADERLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
std::string GetPDBCOMPNDLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
std::string GetPDBSOURCELine(cif::File& cifFile, std::string::size_type truncate_at = 127);
std::string GetPDBAUTHORLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
std::string GetPDBHEADERLine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
std::string GetPDBCOMPNDLine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
std::string GetPDBSOURCELine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
std::string GetPDBAUTHORLine(const cif::Datablock &data, std::string::size_type truncate_at = 127);

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -82,16 +82,7 @@ inline bool isAnyPrint(int ch)
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
}
inline bool isUnquotedString(const char *s)
{
bool result = isOrdinary(*s++);
while (result and *s != 0)
{
result = isNonBlank(*s);
++s;
}
return result;
}
bool isUnquotedString(const char *s);
// --------------------------------------------------------------------
@@ -139,7 +130,7 @@ class SacParser
int getNextChar();
void retract();
void restart();
int restart(int start);
CIFToken getNextToken();
void match(CIFToken token);
@@ -181,8 +172,9 @@ class SacParser
eStateTextField,
eStateFloat = 100,
eStateInt = 110,
// eStateNumericSuffix = 200,
eStateValue = 300
eStateValue = 300,
eStateDATA,
eStateSAVE
};
std::istream &mData;
@@ -191,7 +183,6 @@ class SacParser
bool mValidate;
uint32_t mLineNr;
bool mBol;
int mState, mStart;
CIFToken mLookahead;
std::string mTokenValue;
CIFValueType mTokenType;

View File

@@ -26,11 +26,7 @@
#pragma once
#include <cassert>
#include <filesystem>
#include <iostream>
#include <list>
#include <memory>
#include <set>
#include <vector>

View File

@@ -221,7 +221,7 @@ struct PointF
FType length() const
{
return sqrt(mX * mX + mY * mY + mZ * mZ);
return std::sqrt(mX * mX + mY * mY + mZ * mZ);
}
};
@@ -285,7 +285,7 @@ inline double DistanceSquared(const PointF<F> &a, const PointF<F> &b)
template <typename F>
inline double Distance(const PointF<F> &a, const PointF<F> &b)
{
return sqrt(
return std::sqrt(
(a.mX - b.mX) * (a.mX - b.mX) +
(a.mY - b.mY) * (a.mY - b.mY) +
(a.mZ - b.mZ) * (a.mZ - b.mZ));
@@ -332,10 +332,10 @@ double DihedralAngle(const PointF<F> &p1, const PointF<F> &p2, const PointF<F> &
double result = 360;
if (u > 0 and v > 0)
{
u = DotProduct(p, x) / sqrt(u);
v = DotProduct(p, y) / sqrt(v);
u = DotProduct(p, x) / std::sqrt(u);
v = DotProduct(p, y) / std::sqrt(v);
if (u != 0 or v != 0)
result = atan2(v, u) * 180 / kPI;
result = std::atan2(v, u) * 180 / kPI;
}
return result;
@@ -351,7 +351,7 @@ double CosinusAngle(const PointF<F> &p1, const PointF<F> &p2, const PointF<F> &p
double x = DotProduct(v12, v12) * DotProduct(v34, v34);
if (x > 0)
result = DotProduct(v12, v34) / sqrt(x);
result = DotProduct(v12, v34) / std::sqrt(x);
return result;
}
@@ -371,15 +371,16 @@ auto DistancePointToLine(const PointF<F> &l1, const PointF<F> &l2, const PointF<
// a random direction with a distance randomly chosen from a normal
// distribution with a stddev of offset.
template <typename F>
PointF<F> Nudge(PointF<F> p, F offset);
Point Nudge(Point p, float offset);
// --------------------------------------------------------------------
// We use quaternions to do rotations in 3d space
Quaternion Normalize(Quaternion q);
Quaternion ConstructFromAngleAxis(float angle, Point axis);
std::tuple<double, Point> QuaternionToAngleAxis(Quaternion q);
Point Centroid(const std::vector<Point> &Points);
Point CenterPoints(std::vector<Point> &Points);
@@ -437,9 +438,9 @@ class SphericalDots
double lat = std::asin((2.0 * i) / P);
double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
p->mX = sin(lon) * cos(lat);
p->mY = cos(lon) * cos(lat);
p->mZ = sin(lat);
p->mX = std::sin(lon) * std::cos(lat);
p->mY = std::cos(lon) * std::cos(lat);
p->mZ = std::sin(lat);
++p;
}

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -34,16 +34,16 @@
#include "cif++/Point.hpp"
/*
To modify a structure, you will have to use actions.
The currently supported actions are:
To modify a structure, you will have to use actions.
The currently supported actions are:
// - Move atom to new location
- Remove atom
- Remove atom
// - Add new atom that was formerly missing
// - Add alternate Residue
-
-
*/
namespace mmcif
@@ -61,7 +61,6 @@ class File;
class Atom
{
private:
struct AtomImpl : public std::enable_shared_from_this<AtomImpl>
{
AtomImpl(cif::Datablock &db, const std::string &id, cif::Row row);
@@ -77,9 +76,11 @@ class Atom
bool getAnisoU(float anisou[6]) const;
int charge() const;
void moveTo(const Point &p);
const Compound &comp() const;
const Compound *compound() const;
const std::string get_property(const std::string_view name) const;
void set_property(const std::string_view name, const std::string &value);
@@ -99,7 +100,7 @@ class Atom
int mRefcount;
cif::Row mRow;
mutable std::vector<std::tuple<std::string,cif::detail::ItemReference>> mCachedRefs;
mutable std::vector<std::tuple<std::string, cif::detail::ItemReference>> mCachedRefs;
mutable const Compound *mCompound = nullptr;
@@ -110,14 +111,17 @@ class Atom
};
public:
Atom() {}
Atom(std::shared_ptr<AtomImpl> impl)
: mImpl(impl) {}
: mImpl(impl)
{
}
Atom(const Atom &rhs)
: mImpl(rhs.mImpl) {}
: mImpl(rhs.mImpl)
{
}
Atom(cif::Datablock &db, cif::Row &row);
@@ -141,6 +145,8 @@ class Atom
void set_property(const std::string_view name, const std::string &value)
{
if (not mImpl)
throw std::logic_error("Error trying to modify an uninitialized atom");
mImpl->set_property(name, value);
}
@@ -150,11 +156,16 @@ class Atom
set_property(name, std::to_string(value));
}
const std::string &id() const { return mImpl->mID; }
AtomType type() const { return mImpl->mType; }
const std::string &id() const { return impl().mID; }
AtomType type() const { return impl().mType; }
Point location() const { return mImpl->mLocation; }
void location(Point p) { mImpl->moveTo(p); }
Point location() const { return impl().mLocation; }
void location(Point p)
{
if (not mImpl)
throw std::logic_error("Error trying to modify an uninitialized atom");
mImpl->moveTo(p);
}
/// \brief Translate the position of this atom by \a t
void translate(Point t);
@@ -169,33 +180,33 @@ class Atom
void translateRotateAndTranslate(Point t1, Quaternion q, Point t2);
// for direct access to underlying data, be careful!
const cif::Row getRow() const { return mImpl->mRow; }
const cif::Row getRow() const { return impl().mRow; }
const cif::Row getRowAniso() const;
bool isSymmetryCopy() const { return mImpl->mSymmetryCopy; }
std::string symmetry() const { return mImpl->mSymmetryOperator; }
bool isSymmetryCopy() const { return impl().mSymmetryCopy; }
std::string symmetry() const { return impl().mSymmetryOperator; }
const Compound &comp() const { return mImpl->comp(); }
bool isWater() const { return mImpl->mCompID == "HOH" or mImpl->mCompID == "H2O" or mImpl->mCompID == "WAT"; }
const Compound &compound() const;
bool isWater() const { return impl().mCompID == "HOH" or impl().mCompID == "H2O" or impl().mCompID == "WAT"; }
int charge() const;
float uIso() const;
bool getAnisoU(float anisou[6]) const { return mImpl->getAnisoU(anisou); }
bool getAnisoU(float anisou[6]) const { return impl().getAnisoU(anisou); }
float occupancy() const;
// specifications
const std::string& labelAtomID() const { return mImpl->mAtomID; }
const std::string& labelCompID() const { return mImpl->mCompID; }
const std::string& labelAsymID() const { return mImpl->mAsymID; }
const std::string &labelAtomID() const { return impl().mAtomID; }
const std::string &labelCompID() const { return impl().mCompID; }
const std::string &labelAsymID() const { return impl().mAsymID; }
std::string labelEntityID() const;
int labelSeqID() const { return mImpl->mSeqID; }
const std::string& labelAltID() const { return mImpl->mAltID; }
bool isAlternate() const { return not mImpl->mAltID.empty(); }
int labelSeqID() const { return impl().mSeqID; }
const std::string &labelAltID() const { return impl().mAltID; }
bool isAlternate() const { return not impl().mAltID.empty(); }
std::string authAtomID() const;
std::string authCompID() const;
std::string authAsymID() const;
const std::string& authSeqID() const { return mImpl->mAuthSeqID; }
const std::string &authSeqID() const { return impl().mAuthSeqID; }
std::string pdbxAuthInsCode() const;
std::string pdbxAuthAltID() const;
@@ -203,6 +214,10 @@ class Atom
std::string pdbID() const; // auth_comp_id + '_' + auth_asym_id + '_' + auth_seq_id + pdbx_PDB_ins_code
bool operator==(const Atom &rhs) const;
bool operator!=(const Atom &rhs) const
{
return not operator==(rhs);
}
// access data in compound for this atom
@@ -218,7 +233,7 @@ class Atom
std::swap(mImpl, b.mImpl);
}
int compare(const Atom &b) const { return mImpl->compare(*b.mImpl); }
int compare(const Atom &b) const { return impl().compare(*b.mImpl); }
bool operator<(const Atom &rhs) const
{
@@ -227,10 +242,22 @@ class Atom
friend std::ostream &operator<<(std::ostream &os, const Atom &atom);
/// \brief Synchronize data with underlying cif data
void sync()
{
if (mImpl)
mImpl->prefetch();
}
private:
friend class Structure;
void setID(int id);
const AtomImpl &impl() const
{
if (not mImpl)
throw std::runtime_error("Uninitialized atom, not found?");
return *mImpl;
}
std::shared_ptr<AtomImpl> mImpl;
};
@@ -238,20 +265,20 @@ class Atom
template <>
inline std::string Atom::get_property<std::string>(const std::string_view name) const
{
return mImpl->get_property(name);
return impl().get_property(name);
}
template <>
inline int Atom::get_property<int>(const std::string_view name) const
{
auto v = mImpl->get_property(name);
auto v = impl().get_property(name);
return v.empty() ? 0 : stoi(v);
}
template <>
inline float Atom::get_property<float>(const std::string_view name) const
{
return stof(mImpl->get_property(name));
return stof(impl().get_property(name));
}
inline void swap(mmcif::Atom &a, mmcif::Atom &b)
@@ -273,12 +300,19 @@ typedef std::vector<Atom> AtomView;
// --------------------------------------------------------------------
enum class EntityType
{
Polymer, NonPolymer, Macrolide, Water, Branched
};
// --------------------------------------------------------------------
class Residue
{
public:
// constructor
Residue(const Structure &structure, const std::string &compoundID,
const std::string &asymID, int seqID = 0, const std::string &authSeqID = {})
const std::string &asymID, int seqID, const std::string &authSeqID)
: mStructure(&structure)
, mCompoundID(compoundID)
, mAsymID(asymID)
@@ -296,12 +330,11 @@ class Residue
virtual ~Residue();
const Compound &compound() const;
AtomView &atoms();
const AtomView &atoms() const;
void addAtom(const Atom &atom)
{
mAtoms.push_back(atom);
}
void addAtom(Atom &atom);
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
AtomView unique_atoms() const;
@@ -318,6 +351,8 @@ class Residue
int seqID() const { return mSeqID; }
std::string entityID() const;
EntityType entityType() const;
std::string authAsymID() const;
std::string authSeqID() const;
std::string authInsCode() const;
@@ -355,6 +390,16 @@ class Residue
friend Structure;
bool operator==(const mmcif::Residue &rhs) const
{
return this == &rhs or (
mStructure == rhs.mStructure and
mSeqID == rhs.mSeqID and
mAsymID == rhs.mAsymID and
mCompoundID == rhs.mCompoundID and
mAuthSeqID == rhs.mAuthSeqID);
}
protected:
Residue() {}
@@ -363,9 +408,6 @@ class Residue
const Structure *mStructure = nullptr;
std::string mCompoundID, mAsymID;
int mSeqID = 0;
// Watch out, this is used only to label waters... The rest of the code relies on
// MapLabelToAuth to get this info. Perhaps we should rename this member field.
std::string mAuthSeqID;
AtomView mAtoms;
};
@@ -432,6 +474,11 @@ class Monomer : public Residue
// for LEU and VAL
float chiralVolume() const;
bool operator==(const Monomer &rhs) const
{
return mPolymer == rhs.mPolymer and mIndex == rhs.mIndex;
}
private:
const Polymer *mPolymer;
size_t mIndex;
@@ -469,35 +516,95 @@ class Polymer : public std::vector<Monomer>
cif::RowSet mPolySeq;
};
// --------------------------------------------------------------------
// Sugar and Branch, to describe glycosylation sites
class Branch;
class Sugar : public Residue
{
public:
Sugar(const Branch &branch, const std::string &compoundID,
const std::string &asymID, int authSeqID);
Sugar(Sugar &&rhs);
Sugar &operator=(Sugar &&rhs);
int num() const { return std::stoi(mAuthSeqID); }
std::string name() const;
/// \brief Return the atom the C1 is linked to
Atom getLink() const { return mLink; }
void setLink(Atom link) { mLink = link; }
size_t getLinkNr() const
{
return mLink ? std::stoi(mLink.authSeqID()) : 0;
}
private:
const Branch *mBranch;
Atom mLink;
};
class Branch : public std::vector<Sugar>
{
public:
Branch(Structure &structure, const std::string &asymID);
void linkAtoms();
std::string name() const;
float weight() const;
std::string asymID() const { return mAsymID; }
Structure &structure() { return *mStructure; }
const Structure &structure() const { return *mStructure; }
Sugar &getSugarByNum(int nr);
const Sugar &getSugarByNum(int nr) const;
private:
friend Sugar;
std::string name(const Sugar &s) const;
Structure *mStructure;
std::string mAsymID;
};
// --------------------------------------------------------------------
// file is a reference to the data stored in e.g. the cif file.
// This object is not copyable.
class File : public std::enable_shared_from_this<File>
class File : public cif::File
{
public:
File();
File(const std::filesystem::path &path);
File(const char *data, size_t length); // good luck trying to find out what it is...
~File();
File() {}
File(const std::filesystem::path &path)
{
load(path);
}
File(const char *data, size_t length)
{
load(data, length);
}
File(const File &) = delete;
File &operator=(const File &) = delete;
cif::Datablock& createDatablock(const std::string_view name);
void load(const std::filesystem::path &p) override;
void save(const std::filesystem::path &p) override;
void load(const std::filesystem::path &path);
void save(const std::filesystem::path &path);
void load(std::istream &is) override;
Structure *model(size_t nr = 1);
using cif::File::load;
using cif::File::save;
struct FileImpl &impl() const { return *mImpl; }
cif::Datablock &data();
cif::File &file();
private:
struct FileImpl *mImpl;
cif::Datablock &data() { return front(); }
};
// --------------------------------------------------------------------
@@ -517,25 +624,50 @@ inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
class Structure
{
public:
Structure(File &p, size_t modelNr = 1, StructureOpenOptions options = {});
Structure &operator=(const Structure &) = delete;
~Structure();
Structure(cif::File &p, size_t modelNr = 1, StructureOpenOptions options = {})
: Structure(p.front(), modelNr, options)
{
}
Structure(cif::Datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
Structure(Structure &&s) = default;
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
Structure(const Structure &);
File &getFile() const;
Structure &operator=(const Structure &) = delete;
// Structure &operator=(Structure &&s) = default;
~Structure();
const AtomView &atoms() const { return mAtoms; }
// AtomView &atoms() { return mAtoms; }
EntityType getEntityTypeForEntityID(const std::string entityID) const;
EntityType getEntityTypeForAsymID(const std::string asymID) const;
AtomView waters() const;
const std::list<Polymer> &polymers() const { return mPolymers; }
std::list<Polymer> &polymers() { return mPolymers; }
const std::vector<Residue> &nonPolymers() const { return mNonPolymers; }
const std::vector<Residue> &branchResidues() const { return mBranchResidues; }
Polymer &getPolymerByAsymID(const std::string &asymID);
Atom getAtomByID(std::string id) const;
const Polymer &getPolymerByAsymID(const std::string &asymID) const
{
return const_cast<Structure *>(this)->getPolymerByAsymID(asymID);
}
const std::list<Branch> &branches() const { return mBranches; }
std::list<Branch> &branches() { return mBranches; }
Branch &getBranchByAsymID(const std::string &asymID);
const Branch &getBranchByAsymID(const std::string &asymID) const;
const std::vector<Residue> &nonPolymers() const { return mNonPolymers; }
Atom getAtomByID(const std::string &id) const;
// Atom getAtomByLocation(Point pt, float maxDistance) const;
Atom getAtomByLabel(const std::string &atomID, const std::string &asymID,
@@ -547,45 +679,68 @@ class Structure
/// \brief Return the atom closest to point \a p with atom type \a type in a residue of type \a res_type
Atom getAtomByPositionAndType(Point p, std::string_view type, std::string_view res_type) const;
/// \brief Get a residue, if \a seqID is zero, the non-polymers are searched
const Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID = 0) const;
/// \brief Get a non-poly residue for an asym with id \a asymID
Residue &getResidue(const std::string &asymID)
{
return getResidue(asymID, 0, "");
}
/// \brief Get a residue, if \a seqID is zero, the non-polymers are searched
Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID = 0);
/// \brief Get a non-poly residue for an asym with id \a asymID
const Residue &getResidue(const std::string &asymID) const
{
return getResidue(asymID, 0, "");
}
/// \brief Get a the single residue for an asym with id \a asymID
const Residue &getResidue(const std::string &asymID) const;
/// \brief Get a residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
Residue &getResidue(const std::string &asymID, int seqID, const std::string &authSeqID);
/// \brief Get a the single residue for an asym with id \a asymID
Residue &getResidue(const std::string &asymID);
/// \brief Get a the single residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
const Residue &getResidue(const std::string &asymID, int seqID, const std::string &authSeqID) const
{
return const_cast<Structure *>(this)->getResidue(asymID, seqID, authSeqID);
}
// map between auth and label locations
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID);
std::tuple<std::string, int, std::string> MapAuthToLabel(const std::string &asymID,
const std::string &seqID, const std::string &compID, const std::string &insCode = "");
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
const Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID) const
{
return const_cast<Structure *>(this)->getResidue(asymID, compID, seqID, authSeqID);
}
std::tuple<std::string, std::string, std::string, std::string> MapLabelToAuth(
const std::string &asymID, int seqID, const std::string &compID);
/// \brief Get a the residue for atom \a atom
Residue &getResidue(const mmcif::Atom &atom)
{
return getResidue(atom.labelAsymID(), atom.labelCompID(), atom.labelSeqID(), atom.authSeqID());
}
// returns chain, seqnr, icode
std::tuple<char, int, char> MapLabelToAuth(
const std::string &asymID, int seqID) const;
// returns chain,seqnr,comp,iCode
std::tuple<std::string, int, std::string, std::string> MapLabelToPDB(
const std::string &asymID, int seqID, const std::string &compID,
const std::string &authSeqID) const;
std::tuple<std::string, int, std::string> MapPDBToLabel(
const std::string &asymID, int seqID, const std::string &compID, const std::string &iCode) const;
/// \brief Get a the residue for atom \a atom
const Residue &getResidue(const mmcif::Atom &atom) const
{
return getResidue(atom.labelAsymID(), atom.labelCompID(), atom.labelSeqID(), atom.authSeqID());
}
// Actions
void removeAtom(Atom &a);
void swapAtoms(Atom &a1, Atom &a2); // swap the labels for these atoms
void moveAtom(Atom &a, Point p); // move atom to a new location
void removeAtom(Atom &a)
{
removeAtom(a, true);
}
void swapAtoms(Atom a1, Atom a2); // swap the labels for these atoms
void moveAtom(Atom a, Point p); // move atom to a new location
void changeResidue(Residue &res, const std::string &newCompound,
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
/// \brief Remove a residue, can be monomer or nonpoly
///
/// \param asym_id The asym ID
/// \param seq_id The sequence ID
void removeResidue(const std::string &asym_id, int seq_id, const std::string &auth_seq_id)
{
removeResidue(getResidue(asym_id, seq_id, auth_seq_id));
}
/// \brief Create a new non-polymer entity, returns new ID
/// \param mon_id The mon_id for the new nonpoly, must be an existing and known compound from CCD
/// \return The ID of the created entity
@@ -599,9 +754,33 @@ class Structure
/// \return The newly create asym ID
std::string createNonpoly(const std::string &entity_id, const std::vector<mmcif::Atom> &atoms);
/// \brief To sort the atoms in order of model > asym-id > res-id > atom-id
/// Will asssign new atom_id's to all atoms. Be carefull
void sortAtoms();
/// \brief Create a new NonPolymer struct_asym with atoms constructed from info in \a atom_info, returns asym_id.
/// This method creates new atom records filled with info from the info.
///
/// \param entity_id The entity ID of the new nonpoly
/// \param atoms The array of sets of cif::item data containing the data for the atoms.
/// \return The newly create asym ID
std::string createNonpoly(const std::string &entity_id, std::vector<std::vector<cif::Item>> &atom_info);
/// \brief Create a new (sugar) branch with one first NAG containing atoms constructed from \a nag_atom_info
Branch &createBranch(std::vector<std::vector<cif::Item>> &nag_atom_info);
/// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms constructed from \a atom_info
///
/// \param asym_id The asym id of the branch to extend
/// \param atom_info Array containing the info for the atoms to construct for the new sugar
/// \param link_sugar The sugar to link to, note: this is the sugar number (1 based)
/// \param link_atom The atom id of the atom linked in the sugar
Branch &extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atom_info,
int link_sugar, const std::string &link_atom);
/// \brief Remove \a branch
void removeBranch(Branch &branch);
/// \brief Remove residue \a res
///
/// \param res The residue to remove
void removeResidue(mmcif::Residue &res);
/// \brief Translate the coordinates of all atoms in the structure by \a t
void translate(Point t);
@@ -616,33 +795,52 @@ class Structure
void translateRotateAndTranslate(Point t1, Quaternion q, Point t2);
const std::vector<Residue> &getNonPolymers() const { return mNonPolymers; }
const std::vector<Residue> &getBranchResidues() const { return mBranchResidues; }
void cleanupEmptyCategories();
/// \brief Direct access to underlying data
cif::Category &category(std::string_view name) const;
cif::Datablock &datablock() const;
cif::Category &category(std::string_view name) const
{
return mDb[name];
}
cif::Datablock &datablock() const
{
return mDb;
}
void validateAtoms() const;
private:
friend Polymer;
friend Residue;
// friend residue_view;
// friend residue_iterator;
std::string insertCompound(const std::string &compoundID, bool isEntity);
std::string createEntityForBranch(Branch &branch);
void loadData();
void updateAtomIndex();
void loadAtomsForModel(StructureOpenOptions options);
File &mFile;
template<typename... Args>
Atom& emplace_atom(Args ...args)
{
return emplace_atom(Atom{std::forward<Args>(args)...});
}
Atom &emplace_atom(Atom &&atom);
void removeAtom(Atom &a, bool removeFromResidue);
void removeSugar(Sugar &sugar);
cif::Datablock &mDb;
size_t mModelNr;
AtomView mAtoms;
std::vector<size_t> mAtomIndex;
std::list<Polymer> mPolymers;
std::vector<Residue> mNonPolymers, mBranchResidues;
std::list<Branch> mBranches;
std::vector<Residue> mNonPolymers;
};
} // namespace mmcif

View File

@@ -159,11 +159,221 @@ const AtomTypeInfo kKnownAtoms[] =
{ Ts, "Tennessine", "Ts", 294, true, { kNA, kNA, kNA, 165, kNA, kNA, kNA } }, // 117 Ts Tenness­ine
{ Og, "Oganesson", "Og", 294, true, { kNA, kNA, kNA, 157, kNA, kNA, kNA } }, // 118 Og Oga­nesson
{ D, "Deuterium", "D", 2.014f, false, { 53, 25, 37, 32, kNA, kNA, 120 } }, // 1 D Deuterium
{ D, "Deuterium", "D", 2.014f, false, { 53, 25, 37, 32, kNA, kNA, 120 } }, // 1 D Deuterium
};
uint32_t kKnownAtomsCount = sizeof(kKnownAtoms) / sizeof(AtomTypeInfo);
// --------------------------------------------------------------------
// Crystal ionic radii, as taken from Wikipedia (https://en.m.wikipedia.org/wiki/Ionic_radius)
const struct IonicRadii
{
AtomType type;
float radii[11];
} kCrystalIonicRadii[] = {
{ H, { kNA, kNA, 208, -4, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Hydrogen
{ Li, { kNA, kNA, kNA, 90, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Lithium
{ Be, { kNA, kNA, kNA, kNA, 59, kNA, kNA, kNA, kNA, kNA, kNA } }, // Beryllium
{ B, { kNA, kNA, kNA, kNA, kNA, 41, kNA, kNA, kNA, kNA, kNA } }, // Boron
{ C, { kNA, kNA, kNA, kNA, kNA, kNA, 30, kNA, kNA, kNA, kNA } }, // Carbon
{ N, { 132, kNA, kNA, kNA, kNA, 30, kNA, 27, kNA, kNA, kNA } }, // Nitrogen
{ O, { kNA, 126, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Oxygen
{ F, { kNA, kNA, 119, kNA, kNA, kNA, kNA, kNA, kNA, 22, kNA } }, // Fluorine
{ Na, { kNA, kNA, kNA, 116, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Sodium
{ Mg, { kNA, kNA, kNA, kNA, 86, kNA, kNA, kNA, kNA, kNA, kNA } }, // Magnesium
{ Al, { kNA, kNA, kNA, kNA, kNA, 67.5, kNA, kNA, kNA, kNA, kNA } }, // Aluminium
{ Si, { kNA, kNA, kNA, kNA, kNA, kNA, 54, kNA, kNA, kNA, kNA } }, // Silicon
{ P, { kNA, kNA, kNA, kNA, kNA, 58, kNA, 52, kNA, kNA, kNA } }, // Phosphorus
{ S, { kNA, 170, kNA, kNA, kNA, kNA, 51, kNA, 43, kNA, kNA } }, // Sulfur
{ Cl, { kNA, kNA, 181, kNA, kNA, kNA, kNA, 26, kNA, 41, kNA } }, // Chlorine
{ K, { kNA, kNA, kNA, 152, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Potassium
{ Ca, { kNA, kNA, kNA, kNA, 114, kNA, kNA, kNA, kNA, kNA, kNA } }, // Calcium
{ Sc, { kNA, kNA, kNA, kNA, kNA, 88.5, kNA, kNA, kNA, kNA, kNA } }, // Scandium
{ Ti, { kNA, kNA, kNA, kNA, 100, 81, 74.5, kNA, kNA, kNA, kNA } }, // Titanium
{ V, { kNA, kNA, kNA, kNA, 93, 78, 72, 68, kNA, kNA, kNA } }, // Vanadium
{ Cr, { kNA, kNA, kNA, kNA, 87, 75.5, 69, 63, 58, kNA, kNA } }, // Chromium ls
// { Cr,{ kNA, kNA, kNA, kNA, 94, kNA, kNA, kNA, kNA, kNA, kNA } }, // Chromium hs
{ Mn, { kNA, kNA, kNA, kNA, 81, 72, 67, 47, 39.5, 60, kNA } }, // Manganese ls
// { Mn,{ kNA, kNA, kNA, kNA, 97, 78.5, kNA, kNA, kNA, kNA, kNA } }, // Manganese hs
{ Fe, { kNA, kNA, kNA, kNA, 75, 69, 72.5, kNA, 39, kNA, kNA } }, // Iron ls
// { Fe,{ kNA, kNA, kNA, kNA, 92, 78.5, kNA, kNA, kNA, kNA, kNA } }, // Iron hs
{ Co, { kNA, kNA, kNA, kNA, 79, 68.5, kNA, kNA, kNA, kNA, kNA } }, // Cobalt ls
// { Co,{ kNA, kNA, kNA, kNA, 88.5, 75, 67, kNA, kNA, kNA, kNA } }, // Cobalt hs
{ Ni, { kNA, kNA, kNA, kNA, 83, 70, 62, kNA, kNA, kNA, kNA } }, // Nickel ls
// { Ni,{ kNA, kNA, kNA, kNA, kNA, 74, kNA, kNA, kNA, kNA, kNA } }, // Nickel hs
{ Cu, { kNA, kNA, kNA, 91, 87, 68, kNA, kNA, kNA, kNA, kNA } }, // Copper
{ Zn, { kNA, kNA, kNA, kNA, 88 , kNA, kNA, kNA, kNA, kNA, kNA } }, // Zinc
{ Ga, { kNA, kNA, kNA, kNA, kNA, 76, kNA, kNA, kNA, kNA, kNA } }, // Gallium
{ Ge, { kNA, kNA, kNA, kNA, 87, kNA, 67, kNA, kNA, kNA, kNA } }, // Germanium
{ As, { kNA, kNA, kNA, kNA, kNA, 72, kNA, 60, kNA, kNA, kNA } }, // Arsenic
{ Se, { kNA, 184, kNA, kNA, kNA, kNA, 64, kNA, 56, kNA, kNA } }, // Selenium
{ Br, { kNA, kNA, 182, kNA, kNA, 73, kNA, 45, kNA, 53, kNA } }, // Bromine
{ Rb, { kNA, kNA, kNA, 166, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Rubidium
{ Sr, { kNA, kNA, kNA, kNA, 132, kNA, kNA, kNA, kNA, kNA, kNA } }, // Strontium
{ Y, { kNA, kNA, kNA, kNA, kNA, 104, kNA, kNA, kNA, kNA, kNA } }, // Yttrium
{ Zr, { kNA, kNA, kNA, kNA, kNA, kNA, 86, kNA, kNA, kNA, kNA } }, // Zirconium
{ Nb, { kNA, kNA, kNA, kNA, kNA, 86, 82, 78, kNA, kNA, kNA } }, // Niobium
{ Mo, { kNA, kNA, kNA, kNA, kNA, 83, 79, 75, 73, kNA, kNA } }, // Molybdenum
{ Tc, { kNA, kNA, kNA, kNA, kNA, kNA, 78.5, 74, kNA, 70, kNA } }, // Technetium
{ Ru, { kNA, kNA, kNA, kNA, kNA, 82, 76, 70.5, kNA, 52, 150 } }, // Ruthenium
{ Rh, { kNA, kNA, kNA, kNA, kNA, 80.5, 74, 69, kNA, kNA, kNA } }, // Rhodium
{ Pd, { kNA, kNA, kNA, 73, 100, 90, 75.5, kNA, kNA, kNA, kNA } }, // Palladium
{ Ag, { kNA, kNA, kNA, 129, 108, 89, kNA, kNA, kNA, kNA, kNA } }, // Silver
{ Cd, { kNA, kNA, kNA, kNA, 109, kNA, kNA, kNA, kNA, kNA, kNA } }, // Cadmium
{ In, { kNA, kNA, kNA, kNA, kNA, 94, kNA, kNA, kNA, kNA, kNA } }, // Indium
{ Sn, { kNA, kNA, kNA, kNA, kNA, kNA, 83, kNA, kNA, kNA, kNA } }, // Tin
{ Sb, { kNA, kNA, kNA, kNA, kNA, 90, kNA, 74, kNA, kNA, kNA } }, // Antimony
{ Te, { kNA, 207, kNA, kNA, kNA, kNA, 111, kNA, 70, kNA, kNA } }, // Tellurium
{ I, { kNA, kNA, 206, kNA, kNA, kNA, kNA, 109, kNA, 67, kNA } }, // Iodine
{ Xe, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 62 } }, // Xenon
{ Cs, { kNA, kNA, kNA, 167, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Caesium
{ Ba, { kNA, kNA, kNA, kNA, 149, kNA, kNA, kNA, kNA, kNA, kNA } }, // Barium
{ La, { kNA, kNA, kNA, kNA, kNA, 117.2, kNA, kNA, kNA, kNA, kNA } }, // Lanthanum
{ Ce, { kNA, kNA, kNA, kNA, kNA, 115, 101, kNA, kNA, kNA, kNA } }, // Cerium
{ Pr, { kNA, kNA, kNA, kNA, kNA, 113, 99, kNA, kNA, kNA, kNA } }, // Praseodymium
{ Nd, { kNA, kNA, kNA, kNA, 143, 112.3, kNA, kNA, kNA, kNA, kNA } }, // Neodymium
{ Pm, { kNA, kNA, kNA, kNA, kNA, 111, kNA, kNA, kNA, kNA, kNA } }, // Promethium
{ Sm, { kNA, kNA, kNA, kNA, 136, 109.8, kNA, kNA, kNA, kNA, kNA } }, // Samarium
{ Eu, { kNA, kNA, kNA, kNA, 131, 108.7, kNA, kNA, kNA, kNA, kNA } }, // Europium
{ Gd, { kNA, kNA, kNA, kNA, kNA, 107.8, kNA, kNA, kNA, kNA, kNA } }, // Gadolinium
{ Tb, { kNA, kNA, kNA, kNA, kNA, 106.3, 90, kNA, kNA, kNA, kNA } }, // Terbium
{ Dy, { kNA, kNA, kNA, kNA, 121, 105.2, kNA, kNA, kNA, kNA, kNA } }, // Dysprosium
{ Ho, { kNA, kNA, kNA, kNA, kNA, 104.1, kNA, kNA, kNA, kNA, kNA } }, // Holmium
{ Er, { kNA, kNA, kNA, kNA, kNA, 103, kNA, kNA, kNA, kNA, kNA } }, // Erbium
{ Tm, { kNA, kNA, kNA, kNA, 117, 102, kNA, kNA, kNA, kNA, kNA } }, // Thulium
{ Yb, { kNA, kNA, kNA, kNA, 116, 100.8, kNA, kNA, kNA, kNA, kNA } }, // Ytterbium
{ Lu, { kNA, kNA, kNA, kNA, kNA, 100.1, kNA, kNA, kNA, kNA, kNA } }, // Lutetium
{ Hf, { kNA, kNA, kNA, kNA, kNA, kNA, 85, kNA, kNA, kNA, kNA } }, // Hafnium
{ Ta, { kNA, kNA, kNA, kNA, kNA, 86, 82, 78, kNA, kNA, kNA } }, // Tantalum
{ W, { kNA, kNA, kNA, kNA, kNA, kNA, 80, 76, 74, kNA, kNA } }, // Tungsten
{ Re, { kNA, kNA, kNA, kNA, kNA, kNA, 77, 72, 69, 67, kNA } }, // Rhenium
{ Os, { kNA, kNA, kNA, kNA, kNA, kNA, 77, 71.5, 68.5, 66.5, 53 } }, // Osmium
{ Ir, { kNA, kNA, kNA, kNA, kNA, 82, 76.5, 71, kNA, kNA, kNA } }, // Iridium
{ Pt, { kNA, kNA, kNA, kNA, 94, kNA, 76.5, 71, kNA, kNA, kNA } }, // Platinum
{ Au, { kNA, kNA, kNA, 151, kNA, 99, kNA, 71, kNA, kNA, kNA } }, // Gold
{ Hg, { kNA, kNA, kNA, 133, 116, kNA, kNA, kNA, kNA, kNA, kNA } }, // Mercury
{ Tl, { kNA, kNA, kNA, 164, kNA, 102.5, kNA, kNA, kNA, kNA, kNA } }, // Thallium
{ Pb, { kNA, kNA, kNA, kNA, 133, kNA, 91.5, kNA, kNA, kNA, kNA } }, // Lead
{ Bi, { kNA, kNA, kNA, kNA, kNA, 117, kNA, 90, kNA, kNA, kNA } }, // Bismuth
{ Po, { kNA, kNA, kNA, kNA, kNA, kNA, 108, kNA, 81, kNA, kNA } }, // Polonium
{ At, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 76, kNA } }, // Astatine
{ Fr, { kNA, kNA, kNA, 194, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Francium
{ Ra, { kNA, kNA, kNA, kNA, 162, kNA, kNA, kNA, kNA, kNA, kNA } }, // Radium
{ Ac, { kNA, kNA, kNA, kNA, kNA, 126, kNA, kNA, kNA, kNA, kNA } }, // Actinium
{ Th, { kNA, kNA, kNA, kNA, kNA, kNA, 108, kNA, kNA, kNA, kNA } }, // Thorium
{ Pa, { kNA, kNA, kNA, kNA, kNA, 116, 104, 92, kNA, kNA, kNA } }, // Protactinium
{ U, { kNA, kNA, kNA, kNA, kNA, 116.5, 103, 90, 87, kNA, kNA } }, // Uranium
{ Np, { kNA, kNA, kNA, kNA, 124, 115, 101, 89, 86, 85, kNA } }, // Neptunium
{ Pu, { kNA, kNA, kNA, kNA, kNA, 114, 100, 88, 85, kNA, kNA } }, // Plutonium
{ Am, { kNA, kNA, kNA, kNA, 140, 111.5, 99, kNA, kNA, kNA, kNA } }, // Americium
{ Cm, { kNA, kNA, kNA, kNA, kNA, 111, 99, kNA, kNA, kNA, kNA } }, // Curium
{ Bk, { kNA, kNA, kNA, kNA, kNA, 110, 97, kNA, kNA, kNA, kNA } }, // Berkelium
{ Cf, { kNA, kNA, kNA, kNA, kNA, 109, 96.1, kNA, kNA, kNA, kNA } }, // Californium
{ Es, { kNA, kNA, kNA, kNA, kNA, 92.8, kNA, kNA, kNA, kNA, kNA } }, // Einsteinium
}, kEffectiveIonicRadii[] = {
{ H, { kNA, kNA, 139.9, -18, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Hydrogen
{ Li, { kNA, kNA, kNA, 76, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Lithium
{ Be, { kNA, kNA, kNA, kNA, 45, kNA, kNA, kNA, kNA, kNA, kNA } }, // Beryllium
{ B, { kNA, kNA, kNA, kNA, kNA, 27, kNA, kNA, kNA, kNA, kNA } }, // Boron
{ C, { kNA, kNA, kNA, kNA, kNA, kNA, 16, kNA, kNA, kNA, kNA } }, // Carbon
{ N, { 146, kNA, kNA, kNA, kNA, 16, kNA, 13, kNA, kNA, kNA } }, // Nitrogen
{ O, { kNA, 140, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Oxygen
{ F, { kNA, kNA, 133, kNA, kNA, kNA, kNA, kNA, kNA, 8, kNA } }, // Fluorine
{ Na, { kNA, kNA, kNA, 102, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Sodium
{ Mg, { kNA, kNA, kNA, kNA, 72, kNA, kNA, kNA, kNA, kNA, kNA } }, // Magnesium
{ Al, { kNA, kNA, kNA, kNA, kNA, 53.5, kNA, kNA, kNA, kNA, kNA } }, // Aluminium
{ Si, { kNA, kNA, kNA, kNA, kNA, kNA, 40, kNA, kNA, kNA, kNA } }, // Silicon
{ P, { 212, kNA, kNA, kNA, kNA, 44, kNA, 38, kNA, kNA, kNA } }, // Phosphorus
{ S, { kNA, 184, kNA, kNA, kNA, kNA, 37, kNA, 29, kNA, kNA } }, // Sulfur
{ Cl, { kNA, kNA, 181, kNA, kNA, kNA, kNA, 12, kNA, 27, kNA } }, // Chlorine
{ K, { kNA, kNA, kNA, 138, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Potassium
{ Ca, { kNA, kNA, kNA, kNA, 100, kNA, kNA, kNA, kNA, kNA, kNA } }, // Calcium
{ Sc, { kNA, kNA, kNA, kNA, kNA, 74.5, kNA, kNA, kNA, kNA, kNA } }, // Scandium
{ Ti, { kNA, kNA, kNA, kNA, 86, 67, 60.5, kNA, kNA, kNA, kNA } }, // Titanium
{ V, { kNA, kNA, kNA, kNA, 79, 64, 58, 54, kNA, kNA, kNA } }, // Vanadium
{ Cr, { kNA, kNA, kNA, kNA, 73, 61.5, 55, 49, 44, kNA, kNA } }, // Chromium ls
{ Cr, { kNA, kNA, kNA, kNA, 80, kNA, kNA, kNA, kNA, kNA, kNA } }, // Chromium hs
{ Mn, { kNA, kNA, kNA, kNA, 67, 58, 53, 33, 25.5, 46, kNA } }, // Manganese ls
{ Mn, { kNA, kNA, kNA, kNA, 83, 64.5, kNA, kNA, kNA, kNA, kNA } }, // Manganese hs
{ Fe, { kNA, kNA, kNA, kNA, 61, 55, 58.5, kNA, 25, kNA, kNA } }, // Iron ls
{ Fe, { kNA, kNA, kNA, kNA, 78, 64.5, kNA, kNA, kNA, kNA, kNA } }, // Iron hs
{ Co, { kNA, kNA, kNA, kNA, 65, 54.5, kNA, kNA, kNA, kNA, kNA } }, // Cobalt ls
{ Co, { kNA, kNA, kNA, kNA, 74.5, 61, 53, kNA, kNA, kNA, kNA } }, // Cobalt hs
{ Ni, { kNA, kNA, kNA, kNA, 69, 56, 48, kNA, kNA, kNA, kNA } }, // Nickel ls
{ Ni, { kNA, kNA, kNA, kNA, kNA, 60, kNA, kNA, kNA, kNA, kNA } }, // Nickel hs
{ Cu, { kNA, kNA, kNA, 77, 73, 54, kNA, kNA, kNA, kNA, kNA } }, // Copper
{ Zn, { kNA, kNA, kNA, kNA, 74, kNA, kNA, kNA, kNA, kNA, kNA } }, // Zinc
{ Ga, { kNA, kNA, kNA, kNA, kNA, 62, kNA, kNA, kNA, kNA, kNA } }, // Gallium
{ Ge, { kNA, kNA, kNA, kNA, 73, kNA, 53, kNA, kNA, kNA, kNA } }, // Germanium
{ As, { kNA, kNA, kNA, kNA, kNA, 58, kNA, 46, kNA, kNA, kNA } }, // Arsenic
{ Se, { kNA, 198, kNA, kNA, kNA, kNA, 50, kNA, 42, kNA, kNA } }, // Selenium
{ Br, { kNA, kNA, 196, kNA, kNA, 59, kNA, 31, kNA, 39, kNA } }, // Bromine
{ Rb, { kNA, kNA, kNA, 152, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Rubidium
{ Sr, { kNA, kNA, kNA, kNA, 118, kNA, kNA, kNA, kNA, kNA, kNA } }, // Strontium
{ Y, { kNA, kNA, kNA, kNA, kNA, 90, kNA, kNA, kNA, kNA, kNA } }, // Yttrium
{ Zr, { kNA, kNA, kNA, kNA, kNA, kNA, 72, kNA, kNA, kNA, kNA } }, // Zirconium
{ Nb, { kNA, kNA, kNA, kNA, kNA, 72, 68, 64, kNA, kNA, kNA } }, // Niobium
{ Mo, { kNA, kNA, kNA, kNA, kNA, 69, 65, 61, 59, kNA, kNA } }, // Molybdenum
{ Tc, { kNA, kNA, kNA, kNA, kNA, kNA, 64.5, 60, kNA, 56, kNA } }, // Technetium
{ Ru, { kNA, kNA, kNA, kNA, kNA, 68, 62, 56.5, kNA, 38, 36 } }, // Ruthenium
{ Rh, { kNA, kNA, kNA, kNA, kNA, 66.5, 60, 55, kNA, kNA, kNA } }, // Rhodium
{ Pd, { kNA, kNA, kNA, 59, 86, 76, 61.5, kNA, kNA, kNA, kNA } }, // Palladium
{ Ag, { kNA, kNA, kNA, 115, 94, 75, kNA, kNA, kNA, kNA, kNA } }, // Silver
{ Cd, { kNA, kNA, kNA, kNA, 95, kNA, kNA, kNA, kNA, kNA, kNA } }, // Cadmium
{ In, { kNA, kNA, kNA, kNA, kNA, 80, kNA, kNA, kNA, kNA, kNA } }, // Indium
{ Sn, { kNA, kNA, kNA, kNA, 118, kNA, 69, kNA, kNA, kNA, kNA } }, // Tin
{ Sb, { kNA, kNA, kNA, kNA, kNA, 76, kNA, 60, kNA, kNA, kNA } }, // Antimony
{ Te, { kNA, 221, kNA, kNA, kNA, kNA, 97, kNA, 56, kNA, kNA } }, // Tellurium
{ I, { kNA, kNA, 220, kNA, kNA, kNA, kNA, 95, kNA, 53, kNA } }, // Iodine
{ Xe, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 48 } }, // Xenon
{ Cs, { kNA, kNA, kNA, 167, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Caesium
{ Ba, { kNA, kNA, kNA, kNA, 135, kNA, kNA, kNA, kNA, kNA, kNA } }, // Barium
{ La, { kNA, kNA, kNA, kNA, kNA, 103.2, kNA, kNA, kNA, kNA, kNA } }, // Lanthanum
{ Ce, { kNA, kNA, kNA, kNA, kNA, 101, 87, kNA, kNA, kNA, kNA } }, // Cerium
{ Pr, { kNA, kNA, kNA, kNA, kNA, 99, 85, kNA, kNA, kNA, kNA } }, // Praseodymium
{ Nd, { kNA, kNA, kNA, kNA, 129, 98.3, kNA, kNA, kNA, kNA, kNA } }, // Neodymium
{ Pm, { kNA, kNA, kNA, kNA, kNA, 97, kNA, kNA, kNA, kNA, kNA } }, // Promethium
{ Sm, { kNA, kNA, kNA, kNA, 122, 95.8, kNA, kNA, kNA, kNA, kNA } }, // Samarium
{ Eu, { kNA, kNA, kNA, kNA, 117, 94.7, kNA, kNA, kNA, kNA, kNA } }, // Europium
{ Gd, { kNA, kNA, kNA, kNA, kNA, 93.5, kNA, kNA, kNA, kNA, kNA } }, // Gadolinium
{ Tb, { kNA, kNA, kNA, kNA, kNA, 92.3, 76, kNA, kNA, kNA, kNA } }, // Terbium
{ Dy, { kNA, kNA, kNA, kNA, 107, 91.2, kNA, kNA, kNA, kNA, kNA } }, // Dysprosium
{ Ho, { kNA, kNA, kNA, kNA, kNA, 90.1, kNA, kNA, kNA, kNA, kNA } }, // Holmium
{ Er, { kNA, kNA, kNA, kNA, kNA, 89, kNA, kNA, kNA, kNA, kNA } }, // Erbium
{ Tm, { kNA, kNA, kNA, kNA, 103, 88, kNA, kNA, kNA, kNA, kNA } }, // Thulium
{ Yb, { kNA, kNA, kNA, kNA, 102, 86.8, kNA, kNA, kNA, kNA, kNA } }, // Ytterbium
{ Lu, { kNA, kNA, kNA, kNA, kNA, 86.1, kNA, kNA, kNA, kNA, kNA } }, // Lutetium
{ Hf, { kNA, kNA, kNA, kNA, kNA, kNA, 71, kNA, kNA, kNA, kNA } }, // Hafnium
{ Ta, { kNA, kNA, kNA, kNA, kNA, 72, 68, 64, kNA, kNA, kNA } }, // Tantalum
{ W, { kNA, kNA, kNA, kNA, kNA, kNA, 66, 62, 60, kNA, kNA } }, // Tungsten
{ Re, { kNA, kNA, kNA, kNA, kNA, kNA, 63, 58, 55, 53, kNA } }, // Rhenium
{ Os, { kNA, kNA, kNA, kNA, kNA, kNA, 63, 57.5, 54.5, 52.5, 39 } }, // Osmium
{ Ir, { kNA, kNA, kNA, kNA, kNA, 68, 62.5, 57, kNA, kNA, kNA } }, // Iridium
{ Pt, { kNA, kNA, kNA, kNA, 80, kNA, 62.5, 57, kNA, kNA, kNA } }, // Platinum
{ Au, { kNA, kNA, kNA, 137, kNA, 85, kNA, 57, kNA, kNA, kNA } }, // Gold
{ Hg, { kNA, kNA, kNA, 119, 102, kNA, kNA, kNA, kNA, kNA, kNA } }, // Mercury
{ Tl, { kNA, kNA, kNA, 150, kNA, 88.5, kNA, kNA, kNA, kNA, kNA } }, // Thallium
{ Pb, { kNA, kNA, kNA, kNA, 119, kNA, 77.5, kNA, kNA, kNA, kNA } }, // Lead
{ Bi, { kNA, kNA, kNA, kNA, kNA, 103, kNA, 76, kNA, kNA, kNA } }, // Bismuth
{ Po, { kNA, 223, kNA, kNA, kNA, kNA, 94, kNA, 67, kNA, kNA } }, // Polonium
{ At, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 62, kNA } }, // Astatine
{ Fr, { kNA, kNA, kNA, 180, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Francium
{ Ra, { kNA, kNA, kNA, kNA, 148, kNA, kNA, kNA, kNA, kNA, kNA } }, // Radium
{ Ac, { kNA, kNA, kNA, kNA, kNA, 106.5, kNA, kNA, kNA, kNA, kNA } }, // Actinium
{ Th, { kNA, kNA, kNA, kNA, kNA, kNA, 94, kNA, kNA, kNA, kNA } }, // Thorium
{ Pa, { kNA, kNA, kNA, kNA, kNA, 104, 90, 78, kNA, kNA, kNA } }, // Protactinium
{ U, { kNA, kNA, kNA, kNA, kNA, 102.5, 89, 76, 73, kNA, kNA } }, // Uranium
{ Np, { kNA, kNA, kNA, kNA, 110, 101, 87, 75, 72, 71, kNA } }, // Neptunium
{ Pu, { kNA, kNA, kNA, kNA, kNA, 100, 86, 74, 71, kNA, kNA } }, // Plutonium
{ Am, { kNA, kNA, kNA, kNA, 126, 97.5, 85, kNA, kNA, kNA, kNA } }, // Americium
{ Cm, { kNA, kNA, kNA, kNA, kNA, 97, 85, kNA, kNA, kNA, kNA } }, // Curium
{ Bk, { kNA, kNA, kNA, kNA, kNA, 96, 83, kNA, kNA, kNA, kNA } }, // Berkelium
{ Cf, { kNA, kNA, kNA, kNA, kNA, 95, 82.1, kNA, kNA, kNA, kNA } }, // Californium
{ Es, { kNA, kNA, kNA, kNA, kNA, 83.5, kNA, kNA, kNA, kNA, kNA } }, // Einsteinium
};
// --------------------------------------------------------------------
// The coefficients from Waasmaier & Kirfel (1995), Acta Cryst. A51, 416-431.
@@ -873,6 +1083,20 @@ auto AtomTypeTraits::wksf(int charge) const -> const SFData&
return sf.sf;
}
if (charge != 0)
{
// Oops, not found. Fall back to zero charge and see if we can use that
if (cif::VERBOSE > 0)
std::cerr << "No scattering factor found for " << name() << " with charge " << charge << " will try to fall back to zero charge..." << std::endl;
for (auto& sf: data::kWKSFData)
{
if (sf.symbol == mInfo->type and sf.charge == 0)
return sf.sf;
}
}
throw std::runtime_error("No scattering factor found for " + name() + std::to_string(charge));
}
@@ -886,5 +1110,45 @@ auto AtomTypeTraits::elsf() const -> const SFData&
throw std::runtime_error("No scattering factor found for " + name());
}
// ionic radii
float AtomTypeTraits::crystal_ionic_radius(int charge) const
{
float result = data::kNA;
if (charge >= -3 and charge <= 8)
{
for (auto &r : data::kCrystalIonicRadii)
{
if (r.type != mInfo->type)
continue;
result = r.radii[charge < 0 ? charge + 3 : charge + 2] / 100.0f;
break;
}
}
return result;
}
float AtomTypeTraits::effective_ionic_radius(int charge) const
{
float result = data::kNA;
if (charge >= -3 and charge <= 8)
{
for (auto &r : data::kEffectiveIonicRadii)
{
if (r.type != mInfo->type)
continue;
result = r.radii[charge < 0 ? charge + 3 : charge + 2] / 100.0f;
break;
}
}
return result;
}
}

View File

@@ -233,7 +233,7 @@ BondMap::BondMap(const Structure &p)
link[b].insert(a);
};
cif::Datablock &db = p.getFile().data();
cif::Datablock &db = p.datablock();
// collect all compounds first
std::set<std::string> compounds;
@@ -255,54 +255,70 @@ BondMap::BondMap(const Structure &p)
cif::Progress progress(compounds.size(), "Creating bond map");
// some helper indices to speed things up a bit
std::map<std::tuple<std::string, int, std::string>, std::string> atomMapByAsymSeqAndAtom;
std::map<std::tuple<std::string, int, std::string, std::string>, std::string> atomMapByAsymSeqAndAtom;
for (auto &a : p.atoms())
{
auto key = make_tuple(a.labelAsymID(), a.labelSeqID(), a.labelAtomID());
auto key = make_tuple(a.labelAsymID(), a.labelSeqID(), a.labelAtomID(), a.authSeqID());
atomMapByAsymSeqAndAtom[key] = a.id();
}
// first link all residues in a polyseq
std::string lastAsymID;
std::string lastAsymID, lastAuthSeqID;
int lastSeqID = 0;
for (auto r : db["pdbx_poly_seq_scheme"])
for (const auto &[asymID, seqID, authSeqID] : db["pdbx_poly_seq_scheme"].rows<std::string,int,std::string>("asym_id", "seq_id", "pdb_seq_num"))
{
std::string asymID;
int seqID;
cif::tie(asymID, seqID) = r.get("asym_id", "seq_id");
if (asymID != lastAsymID) // first in a new sequece
{
lastAsymID = asymID;
lastSeqID = seqID;
lastAuthSeqID = authSeqID;
continue;
}
auto c = atomMapByAsymSeqAndAtom[make_tuple(asymID, lastSeqID, "C")];
auto n = atomMapByAsymSeqAndAtom[make_tuple(asymID, seqID, "N")];
auto kc = make_tuple(asymID, lastSeqID, "C", lastAuthSeqID);
auto kn = make_tuple(asymID, seqID, "N", authSeqID);
if (atomMapByAsymSeqAndAtom.count(kc) and atomMapByAsymSeqAndAtom.count(kn))
{
auto c = atomMapByAsymSeqAndAtom.at(kc);
auto n = atomMapByAsymSeqAndAtom.at(kn);
if (not(c.empty() or n.empty()))
bindAtoms(c, n);
}
// if (not(c.empty() or n.empty()))
lastSeqID = seqID;
lastAuthSeqID = authSeqID;
}
for (auto l : db["struct_conn"])
{
std::string asym1, asym2, atomId1, atomId2;
int seqId1 = 0, seqId2 = 0;
cif::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2) =
std::string authSeqId1, authSeqId2;
cif::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2, authSeqId1, authSeqId2) =
l.get("ptnr1_label_asym_id", "ptnr2_label_asym_id",
"ptnr1_label_atom_id", "ptnr2_label_atom_id",
"ptnr1_label_seq_id", "ptnr2_label_seq_id");
"ptnr1_label_seq_id", "ptnr2_label_seq_id",
"ptnr1_auth_seq_id", "ptnr2_auth_seq_id");
std::string a = atomMapByAsymSeqAndAtom[make_tuple(asym1, seqId1, atomId1)];
std::string b = atomMapByAsymSeqAndAtom[make_tuple(asym2, seqId2, atomId2)];
auto ka = make_tuple(asym1, seqId1, atomId1, authSeqId1);
auto kb = make_tuple(asym2, seqId2, atomId2, authSeqId2);
if (atomMapByAsymSeqAndAtom.count(ka) and atomMapByAsymSeqAndAtom.count(kb))
{
auto a = atomMapByAsymSeqAndAtom.at(ka);
auto b = atomMapByAsymSeqAndAtom.at(kb);
if (not(a.empty() or b.empty()))
linkAtoms(a, b);
}
// std::string a = atomMapByAsymSeqAndAtom.at(make_tuple(asym1, seqId1, atomId1, authSeqId1));
// std::string b = atomMapByAsymSeqAndAtom.at(make_tuple(asym2, seqId2, atomId2, authSeqId2));
// if (not(a.empty() or b.empty()))
// linkAtoms(a, b);
}
// then link all atoms in the compounds
@@ -373,15 +389,12 @@ BondMap::BondMap(const Structure &p)
}
// loop over pdbx_branch_scheme
for (auto r : db["pdbx_branch_scheme"].find(cif::Key("mon_id") == c))
for (const auto &[asym_id, pdb_seq_num] : db["pdbx_branch_scheme"].find<std::string,std::string>(cif::Key("mon_id") == c, "asym_id", "pdb_seq_num"))
{
std::string asymID;
cif::tie(asymID) = r.get("asym_id");
std::vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[&](auto &a)
{ return a.labelAsymID() == asymID; });
[id = asym_id, nr = pdb_seq_num](const Atom &a)
{ return a.labelAsymID() == id and a.authSeqID() == nr; });
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{

View File

@@ -34,6 +34,7 @@
#include <stack>
#include <tuple>
#include <unordered_map>
#include <mutex>
#include <filesystem>
@@ -409,6 +410,22 @@ Category &Datablock::operator[](std::string_view name)
return *i;
}
const Category &Datablock::operator[](std::string_view name) const
{
using namespace std::literals;
auto result = get(name);
if (result == nullptr)
// throw std::out_of_range("The category with name " + std::string(name) + " does not exist");
{
std::unique_lock lock(mLock);
if (not mNullCategory)
mNullCategory.reset(new Category(const_cast<Datablock&>(*this), "<null>", nullptr));
result = mNullCategory.get();
}
return *result;
}
Category *Datablock::get(std::string_view name)
{
std::shared_lock lock(mLock);
@@ -613,20 +630,12 @@ bool operator==(const cif::Datablock &dbA, const cif::Datablock &dbB)
int d = nA.compare(nB);
if (d > 0)
{
auto cat = dbB.get(*catB_i);
if (cat == nullptr)
missingA.push_back(*catB_i);
missingA.push_back(*catB_i);
++catB_i;
}
else if (d < 0)
{
auto cat = dbA.get(*catA_i);
if (cat == nullptr)
missingB.push_back(*catA_i);
missingB.push_back(*catA_i);
++catA_i;
}
else
@@ -1533,6 +1542,11 @@ void Category::drop(const std::string &field)
}
}
const Row Category::operator[](Condition &&cond) const
{
return const_cast<Category*>(this)->operator[](std::forward<Condition>(cond));
}
Row Category::operator[](Condition &&cond)
{
Row result;
@@ -1908,11 +1922,16 @@ bool Category::hasParent(Row r, const Category &parentCat, const ValidateLink &l
if (mCatValidator->mMandatoryFields.count(name) and field.is_null())
cond = std::move(cond) and (Key(link.mParentKeys[ix]) == Empty());
}
else
else if (parentCat.mCatValidator->mMandatoryFields.count(link.mParentKeys[ix]))
{
const char *value = field.c_str();
cond = std::move(cond) and (Key(link.mParentKeys[ix]) == value);
}
else
{
const char *value = field.c_str();
cond = std::move(cond) and (Key(link.mParentKeys[ix]) == value or Key(link.mParentKeys[ix]) == Empty());
}
}
if (result and not cond.empty())
@@ -2209,9 +2228,27 @@ void Category::validateLinks() const
size_t missing = 0;
for (auto r : *this)
if (not hasParent(r, *parentCat, *link))
{
if (cif::VERBOSE > 1)
{
if (missing == 0)
{
std::cerr << "Links for " << link->mLinkGroupLabel << " are incomplete" << std::endl
<< " These are the items in " << mName << " that don't have matching parent items in " << parentCat->mName << std::endl
<< std::endl;
}
for (auto k : link->mChildKeys)
std::cerr << k << ": " << r[k].as<std::string>() << std::endl;
std::cerr << std::endl;
}
++missing;
if (missing and VERBOSE >= 0)
}
if (missing and VERBOSE == 1)
{
std::cerr << "Links for " << link->mLinkGroupLabel << " are incomplete" << std::endl
<< " There are " << missing << " items in " << mName << " that don't have matching parent items in " << parentCat->mName << std::endl;
@@ -2408,7 +2445,7 @@ namespace detail
size_t writeValue(std::ostream &os, std::string value, size_t offset, size_t width)
{
if (value.find('\n') != std::string::npos or width == 0 or value.length() >= 132) // write as text field
if (value.find('\n') != std::string::npos or width == 0 or value.length() > 132) // write as text field
{
ba::replace_all(value, "\n;", "\n\\;");
@@ -2511,7 +2548,7 @@ void Category::write(std::ostream &os, const std::vector<size_t> &order, bool in
if (not isUnquotedString(v->mText))
l += 2;
if (l >= 132)
if (l > 132)
continue;
if (columnWidths[v->mColumnIndex] < l + 1)
@@ -2547,7 +2584,7 @@ void Category::write(std::ostream &os, const std::vector<size_t> &order, bool in
if (l < w)
l = w;
if (offset + l >= 132 and offset > 0)
if (offset + l > 132 and offset > 0)
{
os << std::endl;
offset = 0;
@@ -2555,7 +2592,7 @@ void Category::write(std::ostream &os, const std::vector<size_t> &order, bool in
offset = detail::writeValue(os, s, offset, w);
if (offset >= 132)
if (offset > 132)
{
os << std::endl;
offset = 0;
@@ -3367,6 +3404,11 @@ File::File(const std::filesystem::path &path, bool validate)
}
}
File::File(const char *data, std::size_t length)
{
load(data, length);
}
File::File(File &&rhs)
: mHead(nullptr)
, mValidator(nullptr)
@@ -3483,6 +3525,25 @@ void File::load(std::istream &is, const std::string &datablock)
}
}
void File::load(const char *data, std::size_t length)
{
bool gzipped = length > 2 and data[0] == static_cast<char>(0x1f) and data[1] == static_cast<char>(0x8b);
struct membuf : public std::streambuf
{
membuf(char *data, size_t length) { this->setg(data, data, data + length); }
} buffer(const_cast<char *>(data), length);
std::istream is(&buffer);
io::filtering_stream<io::input> in;
if (gzipped)
in.push(io::gzip_decompressor());
in.push(is);
load(is);
}
void File::save(std::ostream &os)
{
Datablock *e = mHead;
@@ -3523,6 +3584,21 @@ Datablock &File::operator[](std::string_view name)
return *result;
}
Datablock &File::front()
{
assert(mHead);
return *mHead;
}
Datablock &File::back()
{
assert(mHead);
auto *block = mHead;
while (block->mNext != nullptr)
block = block->mNext;
return *block;
}
bool File::isValid()
{
if (mValidator == nullptr)

View File

@@ -146,7 +146,7 @@ std::string cif2pdbSymmetry(std::string s)
return s;
}
std::string cif2pdbAtomName(std::string name, std::string resName, Datablock& db)
std::string cif2pdbAtomName(std::string name, std::string resName, const Datablock& db)
{
if (name.length() < 4)
{
@@ -166,7 +166,7 @@ std::string cif2pdbAtomName(std::string name, std::string resName, Datablock& db
enum SoftwareType { eRefinement, eDataScaling, eDataExtraction, eDataReduction, ePhasing };
std::string cifSoftware(Datablock& db, SoftwareType sw)
std::string cifSoftware(const Datablock& db, SoftwareType sw)
{
std::string result = "NULL";
@@ -210,7 +210,7 @@ std::string cifSoftware(Datablock& db, SoftwareType sw)
}
// Map asym ID's back to PDB Chain ID's
std::vector<std::string> MapAsymIDs2ChainIDs(const std::vector<std::string>& asymIDs, Datablock& db)
std::vector<std::string> MapAsymIDs2ChainIDs(const std::vector<std::string>& asymIDs, const Datablock& db)
{
std::set<std::string> result;
@@ -275,7 +275,7 @@ size_t WriteOneContinuedLine(std::ostream& pdbFile, std::string header, int cLen
return WriteContinuedLine(pdbFile, header, count, cLen, line, lStart);
}
size_t WriteCitation(std::ostream& pdbFile, Datablock& db, Row r, int reference)
size_t WriteCitation(std::ostream& pdbFile, const Datablock& db, Row r, int reference)
{
size_t result = 0;
@@ -361,7 +361,7 @@ size_t WriteCitation(std::ostream& pdbFile, Datablock& db, Row r, int reference)
return result;
}
void WriteHeaderLines(std::ostream& pdbFile, Datablock& db)
void WriteHeaderLines(std::ostream& pdbFile, const Datablock& db)
{
// 0 1 2 3 4 5 6 7 8
// HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII
@@ -592,7 +592,7 @@ void WriteHeaderLines(std::ostream& pdbFile, Datablock& db)
WriteOneContinuedLine(pdbFile, "AUTHOR ", 2, ba::join(authors, ","));
}
void WriteTitle(std::ostream& pdbFile, Datablock& db)
void WriteTitle(std::ostream& pdbFile, const Datablock& db)
{
WriteHeaderLines(pdbFile, db);
@@ -644,7 +644,7 @@ void WriteTitle(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark1(std::ostream& pdbFile, Datablock& db)
void WriteRemark1(std::ostream& pdbFile, const Datablock& db)
{
int reference = 0;
@@ -662,7 +662,7 @@ void WriteRemark1(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark2(std::ostream& pdbFile, Datablock& db)
void WriteRemark2(std::ostream& pdbFile, const Datablock& db)
{
auto& refine = db["refine"];
if (refine.empty())
@@ -698,7 +698,7 @@ class FBase
protected:
FBase(Row r, const char* f)
: mRow(r), mField(f) {}
FBase(Category& cat, cif::Condition&& cond, const char* f)
FBase(const Category& cat, cif::Condition&& cond, const char* f)
: mField(f)
{
auto r = cat.find(std::move(cond));
@@ -714,7 +714,7 @@ class Fi : public FBase
{
public:
Fi(Row r, const char* f) : FBase(r, f) {}
Fi(Category& cat, cif::Condition&& cond, const char* f) : FBase(cat, std::move(cond), f) {}
Fi(const Category& cat, cif::Condition&& cond, const char* f) : FBase(cat, std::move(cond), f) {}
virtual void out(std::ostream& os)
{
@@ -734,7 +734,7 @@ class Ff : public FBase
{
public:
Ff(Row r, const char* f) : FBase(r, f) {}
Ff(Category& cat, cif::Condition&& cond, const char* f) : FBase(cat, std::move(cond), f) {}
Ff(const Category& cat, cif::Condition&& cond, const char* f) : FBase(cat, std::move(cond), f) {}
virtual void out(std::ostream& os)
{
@@ -765,7 +765,7 @@ class Fs : public FBase
{
public:
Fs(Row r, const char* f, int remarkNr = 3) : FBase(r, f), mNr(remarkNr) {}
Fs(Category& cat, cif::Condition&& cond, const char* f, int remarkNr = 3) : FBase(cat, std::move(cond), f), mNr(remarkNr) {}
Fs(const Category& cat, cif::Condition&& cond, const char* f, int remarkNr = 3) : FBase(cat, std::move(cond), f), mNr(remarkNr) {}
virtual void out(std::ostream& os)
{
@@ -831,7 +831,7 @@ std::ostream& operator<<(std::ostream& os, SEP&& sep)
// --------------------------------------------------------------------
void WriteRemark3BusterTNT(std::ostream& pdbFile, Datablock& db)
void WriteRemark3BusterTNT(std::ostream& pdbFile, const Datablock& db)
{
auto refine = db["refine"].front();
auto ls_shell = db["refine_ls_shell"].front();
@@ -1009,7 +1009,7 @@ void WriteRemark3BusterTNT(std::ostream& pdbFile, Datablock& db)
// --------------------------------------------------------------------
void WriteRemark3CNS(std::ostream& pdbFile, Datablock& db)
void WriteRemark3CNS(std::ostream& pdbFile, const Datablock& db)
{
auto refine = db["refine"].front();
auto ls_shell = db["refine_ls_shell"].front();
@@ -1149,7 +1149,7 @@ void WriteRemark3CNS(std::ostream& pdbFile, Datablock& db)
// --------------------------------------------------------------------
void WriteRemark3Refmac(std::ostream& pdbFile, Datablock& db)
void WriteRemark3Refmac(std::ostream& pdbFile, const Datablock& db)
{
auto refine = db["refine"].front();
auto ls_shell = db["refine_ls_shell"].front();
@@ -1476,7 +1476,7 @@ void WriteRemark3Refmac(std::ostream& pdbFile, Datablock& db)
<< RM3("") << std::endl;
}
void WriteRemark3Shelxl(std::ostream& pdbFile, Datablock& db)
void WriteRemark3Shelxl(std::ostream& pdbFile, const Datablock& db)
{
auto refine = db["refine"].front();
// auto ls_shell = db["refine_ls_shell"].front();
@@ -1555,7 +1555,7 @@ void WriteRemark3Shelxl(std::ostream& pdbFile, Datablock& db)
<< RM3("") << std::endl;
}
void WriteRemark3Phenix(std::ostream& pdbFile, Datablock& db)
void WriteRemark3Phenix(std::ostream& pdbFile, const Datablock& db)
{
auto refine = db["refine"].front();
// auto ls_shell = db["refine_ls_shell"].front();
@@ -1782,7 +1782,7 @@ void WriteRemark3Phenix(std::ostream& pdbFile, Datablock& db)
pdbFile << RM3("") << std::endl;
}
void WriteRemark3XPlor(std::ostream& pdbFile, Datablock& db)
void WriteRemark3XPlor(std::ostream& pdbFile, const Datablock& db)
{
auto refine = db["refine"].front();
auto ls_shell = db["refine_ls_shell"].front();
@@ -1897,7 +1897,7 @@ void WriteRemark3XPlor(std::ostream& pdbFile, Datablock& db)
<< RM3("") << std::endl;
}
void WriteRemark3NuclSQ(std::ostream& pdbFile, Datablock& db)
void WriteRemark3NuclSQ(std::ostream& pdbFile, const Datablock& db)
{
auto refine = db["refine"].front();
auto pdbx_refine = db["pdbx_refine"].front();
@@ -2001,7 +2001,7 @@ void WriteRemark3NuclSQ(std::ostream& pdbFile, Datablock& db)
<< RM3("") << std::endl;
}
void WriteRemark3ProlSQ(std::ostream& pdbFile, Datablock& db)
void WriteRemark3ProlSQ(std::ostream& pdbFile, const Datablock& db)
{
auto refine = db["refine"].front();
auto pdbx_refine = db["pdbx_refine"].front();
@@ -2120,7 +2120,7 @@ void WriteRemark3ProlSQ(std::ostream& pdbFile, Datablock& db)
<< RM3("") << std::endl;
}
void WriteRemark3(std::ostream& pdbFile, Datablock& db)
void WriteRemark3(std::ostream& pdbFile, const Datablock& db)
{
std::string program, authors;
@@ -2200,7 +2200,7 @@ void WriteRemark3(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark200(std::ostream& pdbFile, Datablock& db)
void WriteRemark200(std::ostream& pdbFile, const Datablock& db)
{
typedef RM<200> RM;
@@ -2335,7 +2335,7 @@ void WriteRemark200(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark280(std::ostream& pdbFile, Datablock& db)
void WriteRemark280(std::ostream& pdbFile, const Datablock& db)
{
typedef RM<280> RM;
@@ -2397,7 +2397,7 @@ void WriteRemark280(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark350(std::ostream& pdbFile, Datablock& db)
void WriteRemark350(std::ostream& pdbFile, const Datablock& db)
{
auto& c1 = db["pdbx_struct_assembly"];
if (c1.empty())
@@ -2517,7 +2517,7 @@ void WriteRemark350(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark400(std::ostream& pdbFile, Datablock& db)
void WriteRemark400(std::ostream& pdbFile, const Datablock& db)
{
for (auto& r: db["pdbx_entry_details"])
{
@@ -2527,7 +2527,7 @@ void WriteRemark400(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark450(std::ostream& pdbFile, Datablock& db)
void WriteRemark450(std::ostream& pdbFile, const Datablock& db)
{
for (auto& r: db["pdbx_entry_details"])
{
@@ -2538,7 +2538,7 @@ void WriteRemark450(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark465(std::ostream& pdbFile, Datablock& db)
void WriteRemark465(std::ostream& pdbFile, const Datablock& db)
{
bool first = true;
typedef RM<465> RM;
@@ -2587,7 +2587,7 @@ void WriteRemark465(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark470(std::ostream& pdbFile, Datablock& db)
void WriteRemark470(std::ostream& pdbFile, const Datablock& db)
{
typedef RM<470> RM;
boost::format fmt("REMARK 470 %3.3s %3.3s %1.1s%4.4d%1.1s ");
@@ -2646,12 +2646,12 @@ void WriteRemark470(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark610(std::ostream& pdbFile, Datablock& db)
void WriteRemark610(std::ostream& pdbFile, const Datablock& db)
{
// #warning("unimplemented!");
}
void WriteRemark800(std::ostream& pdbFile, Datablock& db)
void WriteRemark800(std::ostream& pdbFile, const Datablock& db)
{
int nr = 0;
for (auto r: db["struct_site"])
@@ -2676,7 +2676,7 @@ void WriteRemark800(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemark999(std::ostream& pdbFile, Datablock& db)
void WriteRemark999(std::ostream& pdbFile, const Datablock& db)
{
for (auto& r: db["pdbx_entry_details"])
{
@@ -2687,7 +2687,7 @@ void WriteRemark999(std::ostream& pdbFile, Datablock& db)
}
}
void WriteRemarks(std::ostream& pdbFile, Datablock& db)
void WriteRemarks(std::ostream& pdbFile, const Datablock& db)
{
WriteRemark1(pdbFile, db);
WriteRemark2(pdbFile, db);
@@ -2709,7 +2709,7 @@ void WriteRemarks(std::ostream& pdbFile, Datablock& db)
WriteRemark999(pdbFile, db);
}
int WritePrimaryStructure(std::ostream& pdbFile, Datablock& db)
int WritePrimaryStructure(std::ostream& pdbFile, const Datablock& db)
{
int numSeq = 0;
@@ -2863,7 +2863,7 @@ int WritePrimaryStructure(std::ostream& pdbFile, Datablock& db)
return numSeq;
}
int WriteHeterogen(std::ostream& pdbFile, Datablock& db)
int WriteHeterogen(std::ostream& pdbFile, const Datablock& db)
{
int numHet = 0;
@@ -3132,7 +3132,7 @@ int WriteHeterogen(std::ostream& pdbFile, Datablock& db)
return numHet;
}
std::tuple<int,int> WriteSecondaryStructure(std::ostream& pdbFile, Datablock& db)
std::tuple<int,int> WriteSecondaryStructure(std::ostream& pdbFile, const Datablock& db)
{
int numHelix = 0, numSheet = 0;
@@ -3290,7 +3290,7 @@ std::tuple<int,int> WriteSecondaryStructure(std::ostream& pdbFile, Datablock& db
return std::make_tuple(numHelix, numSheet);
}
void WriteConnectivity(std::ostream& pdbFile, cif::Datablock& db)
void WriteConnectivity(std::ostream& pdbFile, const cif::Datablock& db)
{
// SSBOND
// have to filter out alts
@@ -3406,7 +3406,7 @@ void WriteConnectivity(std::ostream& pdbFile, cif::Datablock& db)
}
}
int WriteMiscellaneousFeatures(std::ostream& pdbFile, Datablock& db)
int WriteMiscellaneousFeatures(std::ostream& pdbFile, const Datablock& db)
{
int numSite = 0;
@@ -3459,7 +3459,7 @@ int WriteMiscellaneousFeatures(std::ostream& pdbFile, Datablock& db)
return numSite;
}
void WriteCrystallographic(std::ostream& pdbFile, Datablock& db)
void WriteCrystallographic(std::ostream& pdbFile, const Datablock& db)
{
auto r = db["symmetry"][cif::Key("entry_id") == db.getName()];
std::string symmetry = r["space_group_name_H-M"].as<std::string>();
@@ -3479,7 +3479,7 @@ void WriteCrystallographic(std::ostream& pdbFile, Datablock& db)
% r["Z_PDB"].as<double>()) << std::endl;
}
int WriteCoordinateTransformation(std::ostream& pdbFile, Datablock& db)
int WriteCoordinateTransformation(std::ostream& pdbFile, const Datablock& db)
{
int result = 0;
@@ -3520,7 +3520,7 @@ int WriteCoordinateTransformation(std::ostream& pdbFile, Datablock& db)
return result;
}
std::tuple<int,int> WriteCoordinatesForModel(std::ostream& pdbFile, Datablock& db,
std::tuple<int,int> WriteCoordinatesForModel(std::ostream& pdbFile, const Datablock& db,
const std::map<std::string,std::tuple<std::string,int,std::string>>& last_resseq_for_chain_map,
std::set<std::string>& TERminatedChains, int model_nr)
{
@@ -3658,7 +3658,7 @@ std::tuple<int,int> WriteCoordinatesForModel(std::ostream& pdbFile, Datablock& d
return std::make_tuple(numCoord, numTer);
}
std::tuple<int,int> WriteCoordinate(std::ostream& pdbFile, Datablock& db)
std::tuple<int,int> WriteCoordinate(std::ostream& pdbFile, const Datablock& db)
{
// residues known from seqres
// map<tuple<std::string,int,std::string>,std::string> res2chain_map;
@@ -3718,7 +3718,7 @@ std::tuple<int,int> WriteCoordinate(std::ostream& pdbFile, Datablock& db)
return result;
}
void WritePDBFile(std::ostream& pdbFile, cif::File& cifFile)
void WritePDBFile(std::ostream& pdbFile, const Datablock &db)
{
io::filtering_ostream out;
out.push(FillOutLineFilter());
@@ -3726,8 +3726,6 @@ void WritePDBFile(std::ostream& pdbFile, cif::File& cifFile)
auto filter = out.component<FillOutLineFilter>(0);
assert(filter);
auto& db = cifFile.firstDatablock();
int numRemark = 0, numHet = 0, numHelix = 0, numSheet = 0, numTurn = 0, numSite = 0, numXform = 0, numCoord = 0, numTer = 0, numConect = 0, numSeq = 0;
@@ -3753,7 +3751,7 @@ void WritePDBFile(std::ostream& pdbFile, cif::File& cifFile)
<< "END" << std::endl;
}
void WritePDBHeaderLines(std::ostream& os, cif::File& cifFile)
void WritePDBHeaderLines(std::ostream& os, const Datablock &db)
{
// io::filtering_ostream out;
// out.push(FillOutLineFilter());
@@ -3762,8 +3760,6 @@ void WritePDBHeaderLines(std::ostream& os, cif::File& cifFile)
// auto filter = out.component<FillOutLineFilter>(0);
// assert(filter);
auto& db = cifFile.firstDatablock();
WriteHeaderLines(os, db);
}
@@ -3779,10 +3775,8 @@ std::string FixStringLength(const std::string& s, std::string::size_type l)
return result;
}
std::string GetPDBHEADERLine(cif::File& cifFile, std::string::size_type truncate_at)
std::string GetPDBHEADERLine(const Datablock &db, std::string::size_type truncate_at)
{
auto& db = cifFile.firstDatablock();
// 0 1 2 3 4 5 6 7 8
// HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII
const char kHeader[] =
@@ -3825,10 +3819,8 @@ std::string GetPDBHEADERLine(cif::File& cifFile, std::string::size_type truncate
return FixStringLength((boost::format(kHeader) % keywords % date % db.getName()).str(), truncate_at);
}
std::string GetPDBCOMPNDLine(cif::File& cifFile, std::string::size_type truncate_at)
std::string GetPDBCOMPNDLine(const Datablock &db, std::string::size_type truncate_at)
{
auto& db = cifFile.firstDatablock();
// COMPND
using namespace std::placeholders;
@@ -3886,10 +3878,8 @@ std::string GetPDBCOMPNDLine(cif::File& cifFile, std::string::size_type truncate
return FixStringLength("COMPND " + ba::join(cmpnd, "; "), truncate_at);
}
std::string GetPDBSOURCELine(cif::File& cifFile, std::string::size_type truncate_at)
std::string GetPDBSOURCELine(const Datablock &db, std::string::size_type truncate_at)
{
auto& db = cifFile.firstDatablock();
// SOURCE
int molID = 0;
@@ -3971,10 +3961,8 @@ std::string GetPDBSOURCELine(cif::File& cifFile, std::string::size_type truncate
return FixStringLength("SOURCE " + ba::join(source, "; "), truncate_at);
}
std::string GetPDBAUTHORLine(cif::File& cifFile, std::string::size_type truncate_at)
std::string GetPDBAUTHORLine(const Datablock &db, std::string::size_type truncate_at)
{
auto& db = cifFile.firstDatablock();
// AUTHOR
std::vector<std::string> author;
for (auto r: db["audit_author"])

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -42,7 +42,7 @@ namespace cif
const uint32_t kMaxLineLength = 132;
const uint8_t kCharTraitsTable[128] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4
@@ -82,6 +82,29 @@ const char *SacParser::kValueName[] = {
// --------------------------------------------------------------------
bool isUnquotedString(const char *s)
{
auto ss = s;
bool result = isOrdinary(*s++);
while (result and *s != 0)
{
result = isNonBlank(*s);
++s;
}
// but be careful it does not contain e.g. stop_
if (result)
{
static const std::regex reservedRx(R"((^(?:data|save)|.*(?:loop|stop|global))_.+)", std::regex_constants::icase);
result = not std::regex_match(ss, reservedRx);
}
return result;
}
// --------------------------------------------------------------------
SacParser::SacParser(std::istream &is, bool init)
: mData(is)
{
@@ -151,23 +174,26 @@ void SacParser::retract()
mTokenValue.pop_back();
}
void SacParser::restart()
int SacParser::restart(int start)
{
int result = 0;
while (not mTokenValue.empty())
retract();
switch (mStart)
switch (start)
{
case eStateStart:
mState = mStart = eStateFloat;
result = eStateFloat;
break;
case eStateFloat:
mState = mStart = eStateInt;
result = eStateInt;
break;
case eStateInt:
mState = mStart = eStateValue;
result = eStateValue;
break;
default:
@@ -175,6 +201,8 @@ void SacParser::restart()
}
mBol = false;
return result;
}
void SacParser::match(SacParser::CIFToken t)
@@ -191,7 +219,7 @@ SacParser::CIFToken SacParser::getNextToken()
CIFToken result = eCIFTokenUnknown;
int quoteChar = 0;
mState = mStart = eStateStart;
int state = eStateStart, start = eStateStart;
mBol = false;
mTokenValue.clear();
@@ -201,7 +229,7 @@ SacParser::CIFToken SacParser::getNextToken()
{
auto ch = getNextChar();
switch (mState)
switch (state)
{
case eStateStart:
if (ch == kEOF)
@@ -209,27 +237,23 @@ SacParser::CIFToken SacParser::getNextToken()
else if (ch == '\n')
{
mBol = true;
mState = eStateWhite;
state = eStateWhite;
}
else if (ch == ' ' or ch == '\t')
mState = eStateWhite;
state = eStateWhite;
else if (ch == '#')
mState = eStateComment;
else if (ch == '.')
mState = eStateDot;
state = eStateComment;
else if (ch == '_')
mState = eStateTag;
state = eStateTag;
else if (ch == ';' and mBol)
mState = eStateTextField;
state = eStateTextField;
else if (ch == '\'' or ch == '"')
{
quoteChar = ch;
mState = eStateQuotedString;
state = eStateQuotedString;
}
else if (ch == '?')
mState = eStateQuestionMark;
else
restart();
state = start = restart(start);
break;
case eStateWhite:
@@ -237,7 +261,7 @@ SacParser::CIFToken SacParser::getNextToken()
result = eCIFTokenEOF;
else if (not isspace(ch))
{
mState = eStateStart;
state = eStateStart;
retract();
mTokenValue.clear();
}
@@ -248,7 +272,7 @@ SacParser::CIFToken SacParser::getNextToken()
case eStateComment:
if (ch == '\n')
{
mState = eStateStart;
state = eStateStart;
mBol = true;
mTokenValue.clear();
}
@@ -258,44 +282,19 @@ SacParser::CIFToken SacParser::getNextToken()
error("invalid character in comment");
break;
case eStateQuestionMark:
if (isNonBlank(ch))
mState = eStateValue;
else
{
retract();
result = eCIFTokenValue;
mTokenValue.clear();
mTokenType = eCIFValueUnknown;
}
break;
case eStateDot:
if (isdigit(ch))
mState = eStateFloat + 2;
else if (isspace(ch))
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueInapplicable;
}
else
mState = eStateValue;
break;
case eStateTextField:
if (ch == '\n')
mState = eStateTextField + 1;
state = eStateTextField + 1;
else if (ch == kEOF)
error("unterminated textfield");
else if (not isAnyPrint(ch) and cif::VERBOSE >= 0)
else if (not isAnyPrint(ch))
// error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
std::cerr << "invalid character in text field '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
break;
case eStateTextField + 1:
if (isTextLead(ch) or ch == ' ' or ch == '\t')
mState = eStateTextField;
state = eStateTextField;
else if (ch == ';')
{
assert(mTokenValue.length() >= 2);
@@ -313,9 +312,10 @@ SacParser::CIFToken SacParser::getNextToken()
if (ch == kEOF)
error("unterminated quoted string");
else if (ch == quoteChar)
mState = eStateQuotedStringQuote;
state = eStateQuotedStringQuote;
else if (not isAnyPrint(ch))
error("invalid character in quoted string");
std::cerr << "invalid character in quoted string '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
// error("invalid character in quoted string");
break;
case eStateQuotedStringQuote:
@@ -331,7 +331,7 @@ SacParser::CIFToken SacParser::getNextToken()
else if (ch == quoteChar)
;
else if (isAnyPrint(ch))
mState = eStateQuotedString;
state = eStateQuotedString;
else if (ch == kEOF)
error("unterminated quoted string");
else
@@ -349,12 +349,12 @@ SacParser::CIFToken SacParser::getNextToken()
case eStateFloat:
if (ch == '+' or ch == '-')
{
mState = eStateFloat + 1;
state = eStateFloat + 1;
}
else if (isdigit(ch))
mState = eStateFloat + 1;
state = eStateFloat + 1;
else
restart();
state = start = restart(start);
break;
case eStateFloat + 1:
@@ -362,9 +362,9 @@ SacParser::CIFToken SacParser::getNextToken()
// mState = eStateNumericSuffix;
// else
if (ch == '.')
mState = eStateFloat + 2;
state = eStateFloat + 2;
else if (tolower(ch) == 'e')
mState = eStateFloat + 3;
state = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF)
{
retract();
@@ -372,16 +372,13 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType = eCIFValueInt;
}
else
restart();
state = start = restart(start);
break;
// parsed '.'
case eStateFloat + 2:
// if (ch == '(') // numeric???
// mState = eStateNumericSuffix;
// else
if (tolower(ch) == 'e')
mState = eStateFloat + 3;
state = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF)
{
retract();
@@ -389,30 +386,27 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType = eCIFValueFloat;
}
else
restart();
state = start = restart(start);
break;
// parsed 'e'
case eStateFloat + 3:
if (ch == '-' or ch == '+')
mState = eStateFloat + 4;
state = eStateFloat + 4;
else if (isdigit(ch))
mState = eStateFloat + 5;
state = eStateFloat + 5;
else
restart();
state = start = restart(start);
break;
case eStateFloat + 4:
if (isdigit(ch))
mState = eStateFloat + 5;
state = eStateFloat + 5;
else
restart();
state = start = restart(start);
break;
case eStateFloat + 5:
// if (ch == '(')
// mState = eStateNumericSuffix;
// else
if (isWhite(ch) or ch == kEOF)
{
retract();
@@ -420,14 +414,14 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType = eCIFValueFloat;
}
else
restart();
state = start = restart(start);
break;
case eStateInt:
if (isdigit(ch) or ch == '+' or ch == '-')
mState = eStateInt + 1;
state = eStateInt + 1;
else
restart();
state = start = restart(start);
break;
case eStateInt + 1:
@@ -438,35 +432,11 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType = eCIFValueInt;
}
else
restart();
state = start = restart(start);
break;
// case eStateNumericSuffix:
// if (isdigit(ch))
// mState = eStateNumericSuffix + 1;
// else
// restart();
// break;
//
// case eStateNumericSuffix + 1:
// if (ch == ')')
// {
// result = eCIFTokenValue;
// mTokenType = eCIFValueNumeric;
// }
// else if (not isdigit(ch))
// restart();
// break;
case eStateValue:
if (isNonBlank(ch))
mState = eStateValue + 1;
else
error("invalid character at this position");
break;
case eStateValue + 1:
if (ch == '_') // first _, check for keywords
if (ch == '_')
{
std::string s = toLowerCopy(mTokenValue);
@@ -476,23 +446,40 @@ SacParser::CIFToken SacParser::getNextToken()
result = eCIFTokenSTOP;
else if (s == "loop_")
result = eCIFTokenLOOP;
else if (s == "data_" or s == "save_")
mState = eStateValue + 2;
else if (s == "data_")
{
state = eStateDATA;
continue;
}
else if (s == "save_")
{
state = eStateSAVE;
continue;
}
}
else if (not isNonBlank(ch))
if (result == eCIFTokenUnknown and not isNonBlank(ch))
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueString;
if (mTokenValue == ".")
mTokenType = eCIFValueInapplicable;
else if (mTokenValue == "?")
{
mTokenType = eCIFValueUnknown;
mTokenValue.clear();
}
}
break;
case eStateValue + 2:
case eStateDATA:
case eStateSAVE:
if (not isNonBlank(ch))
{
retract();
if (tolower(mTokenValue[0]) == 'd')
if (state == eStateDATA)
result = eCIFTokenDATA;
else
result = eCIFTokenSAVE;
@@ -521,6 +508,7 @@ SacParser::CIFToken SacParser::getNextToken()
return result;
}
DatablockIndex SacParser::indexDatablocks()
{
DatablockIndex index;

View File

@@ -25,10 +25,7 @@
*/
#include <atomic>
#include <chrono>
#include <cmath>
#include <cstdio>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <iostream>
@@ -37,7 +34,6 @@
#include <regex>
#include <sstream>
#include <thread>
#include <tuple>
#if defined(_MSC_VER)
#define TERM_WIDTH 80
@@ -50,6 +46,8 @@
#include "cif++/CifUtils.hpp"
#include "revision.hpp"
namespace ba = boost::algorithm;
namespace fs = std::filesystem;
@@ -64,39 +62,9 @@ extern int VERBOSE;
std::string get_version_nr()
{
const std::regex
rxVersionNr1(R"(build-(\d+)-g[0-9a-f]{7}(-dirty)?)"),
rxVersionNr2(R"(libcifpp-version: (\d+\.\d+\.\d+))");
#include "revision.hpp"
struct membuf : public std::streambuf
{
membuf(char *data, size_t length) { this->setg(data, data, data + length); }
} buffer(const_cast<char *>(kRevision), sizeof(kRevision));
std::istream is(&buffer);
std::string line, result;
while (getline(is, line))
{
std::smatch m;
if (std::regex_match(line, m, rxVersionNr1))
{
result = m[1];
if (m[2].matched)
result += '*';
break;
}
// always the first, replace with more specific if followed by the other info
if (std::regex_match(line, m, rxVersionNr2))
result = m[1];
}
return result;
std::ostringstream s;
write_version_string(s, false);
return s.str();
}
// --------------------------------------------------------------------
@@ -675,7 +643,6 @@ void ProgressImpl::PrintProgress()
msg.append("| ");
// const char kSpinner[] = { '|', '/', '-', '\\' };
const char kSpinner[] = {' ', '.', 'o', 'O', '0', 'O', 'o', '.'};
const size_t kSpinnerCount = sizeof(kSpinner);
@@ -1232,63 +1199,97 @@ namespace cif
// --------------------------------------------------------------------
std::map<std::string,std::filesystem::path> gLocalResources;
std::filesystem::path gDataDir;
void addDataDirectory(std::filesystem::path dataDir)
class ResourcePool
{
if (VERBOSE > 0 and not fs::exists(dataDir))
std::cerr << "The specified data directory " << dataDir << " does not exist" << std::endl;
gDataDir = dataDir;
public:
static ResourcePool &instance()
{
static std::unique_ptr<ResourcePool> sInstance(new ResourcePool);
return *sInstance;
}
void pushDir(fs::path dir)
{
std::error_code ec;
if (fs::exists(dir, ec) and not ec)
mDirs.push_front(dir);
}
void pushDir(const char *path)
{
if (path != nullptr)
pushDir(fs::path(path));
}
void pushAlias(const std::string &name, std::filesystem::path dataFile)
{
std::error_code ec;
if (not fs::exists(dataFile, ec) or ec)
throw std::runtime_error("Attempt to add a file resource for " + name + " that cannot be used (" + dataFile.string() + ") :" + ec.message());
mLocalResources[name] = dataFile;
}
std::unique_ptr<std::istream> load(fs::path name);
private:
ResourcePool();
std::unique_ptr<std::ifstream> open(fs::path &p)
{
std::unique_ptr<std::ifstream> result;
try
{
if (fs::exists(p))
{
std::unique_ptr<std::ifstream> file(new std::ifstream(p, std::ios::binary));
if (file->is_open())
result.reset(file.release());
}
}
catch (...) {}
return result;
}
std::map<std::string,std::filesystem::path> mLocalResources;
std::deque<fs::path> mDirs;
};
ResourcePool::ResourcePool()
{
#if defined(DATA_DIR)
pushDir(DATA_DIR);
#endif
pushDir(getenv("LIBCIFPP_DATA_DIR"));
auto ccp4 = getenv("CCP4");
if (ccp4 != nullptr)
pushDir(fs::path(ccp4) / "share" / "libcifpp");
#if defined(CACHE_DIR)
pushDir(CACHE_DIR);
#endif
}
void addFileResource(const std::string &name, std::filesystem::path dataFile)
{
if (not fs::exists(dataFile))
throw std::runtime_error("Attempt to add a file resource for " + name + " that does not exist: " + dataFile.string());
gLocalResources[name] = dataFile;
}
std::unique_ptr<std::istream> loadResource(std::filesystem::path name)
std::unique_ptr<std::istream> ResourcePool::load(fs::path name)
{
std::unique_ptr<std::istream> result;
std::error_code ec;
fs::path p = name;
if (gLocalResources.count(name.string()))
{
std::unique_ptr<std::ifstream> file(new std::ifstream(gLocalResources[name.string()], std::ios::binary));
if (file->is_open())
result.reset(file.release());
}
if (mLocalResources.count(name.string()))
result = open(mLocalResources[name.string()]);
if (not result and not fs::exists(p) and not gDataDir.empty())
p = gDataDir / name;
#if defined(CACHE_DIR)
if (not result and not fs::exists(p))
for (auto di = mDirs.begin(); not result and di != mDirs.end(); ++di)
{
auto p2 = fs::path(CACHE_DIR) / p;
if (fs::exists(p2))
swap(p, p2);
}
#endif
#if defined(DATA_DIR)
if (not result and not fs::exists(p))
{
auto p2 = fs::path(DATA_DIR) / p;
if (fs::exists(p2))
swap(p, p2);
}
#endif
if (not result and fs::exists(p))
{
std::unique_ptr<std::ifstream> file(new std::ifstream(p, std::ios::binary));
if (file->is_open())
result.reset(file.release());
auto p2 = *di / p;
if (fs::exists(p2, ec) and not ec)
result = open(p2);
}
if (not result and gResourceData)
@@ -1298,7 +1299,24 @@ std::unique_ptr<std::istream> loadResource(std::filesystem::path name)
result.reset(new mrsrc::istream(rsrc));
}
return result;
return result;
}
// --------------------------------------------------------------------
void addDataDirectory(std::filesystem::path dataDir)
{
ResourcePool::instance().pushDir(dataDir);
}
void addFileResource(const std::string &name, std::filesystem::path dataFile)
{
ResourcePool::instance().pushAlias(name, dataFile);
}
std::unique_ptr<std::istream> loadResource(std::filesystem::path name)
{
return ResourcePool::instance().load(name);
}
} // namespace cif

View File

@@ -378,9 +378,10 @@ const Validator &ValidatorFactory::operator[](std::string_view dictionary)
// not found, add it
fs::path dict_name(dictionary);
// too bad clang version 10 did not have a constructor for fs::path that accepts a std::string_view
fs::path dict_name(dictionary.data(), dictionary.data() + dictionary.length());
auto data = loadResource(dictionary);
auto data = loadResource(dict_name);
if (not data and dict_name.extension().string() != ".dic")
data = loadResource(dict_name.parent_path() / (dict_name.filename().string() + ".dic"));
@@ -389,20 +390,22 @@ const Validator &ValidatorFactory::operator[](std::string_view dictionary)
mValidators.emplace_back(dictionary, *data);
else
{
std::error_code ec;
// might be a compressed dictionary on disk
fs::path p = dictionary;
fs::path p = dict_name;
if (p.extension() == ".dic")
p = p.parent_path() / (p.filename().string() + ".gz");
else
p = p.parent_path() / (p.filename().string() + ".dic.gz");
#if defined(CACHE_DIR) and defined(DATA_DIR)
if (not fs::exists(p))
if (not fs::exists(p, ec) or ec)
{
for (const char *dir : {CACHE_DIR, DATA_DIR})
{
auto p2 = fs::path(dir) / p;
if (fs::exists(p2))
if (fs::exists(p2, ec) and not ec)
{
swap(p, p2);
break;
@@ -411,7 +414,7 @@ const Validator &ValidatorFactory::operator[](std::string_view dictionary)
}
#endif
if (fs::exists(p))
if (fs::exists(p, ec) and not ec)
{
std::ifstream file(p, std::ios::binary);
if (not file.is_open())

View File

@@ -274,7 +274,7 @@ class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryI
public:
CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next);
CompoundFactoryImpl(const std::filesystem::path &file, std::shared_ptr<CompoundFactoryImpl> next);
CompoundFactoryImpl(const fs::path &file, std::shared_ptr<CompoundFactoryImpl> next);
virtual ~CompoundFactoryImpl()
{
@@ -368,7 +368,7 @@ CompoundFactoryImpl::CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> ne
mKnownBases.insert(key);
}
CompoundFactoryImpl::CompoundFactoryImpl(const std::filesystem::path &file, std::shared_ptr<CompoundFactoryImpl> next)
CompoundFactoryImpl::CompoundFactoryImpl(const fs::path &file, std::shared_ptr<CompoundFactoryImpl> next)
: mNext(next)
{
cif::File cifFile(file);
@@ -684,7 +684,7 @@ void CompoundFactory::clear()
sInstance.reset();
}
void CompoundFactory::setDefaultDictionary(const std::filesystem::path &inDictFile)
void CompoundFactory::setDefaultDictionary(const fs::path &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());
@@ -701,7 +701,7 @@ void CompoundFactory::setDefaultDictionary(const std::filesystem::path &inDictFi
}
}
void CompoundFactory::pushDictionary(const std::filesystem::path &inDictFile)
void CompoundFactory::pushDictionary(const fs::path &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());

View File

@@ -657,7 +657,7 @@ class PDBFileParser
int mSeqNum;
char mIcode;
int mDbSeqNum;
int mDbSeqNum = 0;
bool mSeen = false;
std::set<std::string> mAlts;
@@ -941,7 +941,10 @@ class PDBFileParser
std::string pdb2cifDate(std::string s)
{
std::error_code ec;
return pdb2cifDate(s, ec);
auto result = pdb2cifDate(s, ec);
if (ec and cif::VERBOSE > 0)
std::cerr << "Invalid date(" << s << "): " << ec.message() << std::endl;
return result;
}
std::string pdb2cifAuth(std::string author)
@@ -1074,6 +1077,7 @@ class PDBFileParser
std::map<std::string, std::string> mBranch2EntityID;
std::map<std::string, std::string> mAsymID2EntityID;
std::map<std::string, std::string> mMod2parent;
std::set<std::string> mSugarEntities;
};
// --------------------------------------------------------------------
@@ -3773,6 +3777,10 @@ void PDBFileParser::ConstructEntities()
for (auto &chain : mChains)
{
std::string asymID = cif::cifIdForNumber(asymNr++);
if (mMolID2EntityID.count(chain.mMolID) == 0)
continue;
std::string entityID = mMolID2EntityID[chain.mMolID];
mAsymID2EntityID[asymID] = entityID;
@@ -3798,12 +3806,27 @@ void PDBFileParser::ConstructEntities()
for (std::string monID : monIds)
{
std::string authMonID, authSeqNum;
std::string authMonID, authSeqNum, authInsCode;
if (res.mSeen)
{
authMonID = monID;
authSeqNum = std::to_string(res.mSeqNum);
if (res.mIcode != ' ' and res.mIcode != 0)
authInsCode = std::string{res.mIcode};
}
else
{
authMonID = res.mMonID;
authSeqNum = std::to_string(res.mSeqNum);
if (res.mIcode != ' ' and res.mIcode != 0)
authInsCode = std::string{res.mIcode} + "A";
else
authInsCode = "A";
}
if (authInsCode.empty())
authInsCode = ".";
cat->emplace({{"asym_id", asymID},
{"entity_id", mMolID2EntityID[chain.mMolID]},
@@ -3815,7 +3838,7 @@ void PDBFileParser::ConstructEntities()
{"pdb_mon_id", authMonID},
{"auth_mon_id", authMonID},
{"pdb_strand_id", std::string{chain.mDbref.chainID}},
{"pdb_ins_code", (res.mIcode == ' ' or res.mIcode == 0) ? "." : std::string{res.mIcode}},
{"pdb_ins_code", authInsCode},
{"hetero", res.mAlts.empty() ? "n" : "y"}});
}
}
@@ -4583,14 +4606,20 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
}
}
mSugarEntities.insert(entityID);
// create an asym for this sugar tree
std::string asymID = cif::cifIdForNumber(asymNr++);
getCategory("struct_asym")->emplace({{"id", asymID},
{"pdbx_blank_PDB_chainid_flag", si->chainID == ' ' ? "Y" : "N"},
{"pdbx_modified", "N"},
{"entity_id", entityID}});
mAsymID2EntityID[asymID] = entityID;
getCategory("struct_asym")->emplace({
{"id", asymID},
{"pdbx_blank_PDB_chainid_flag", si->chainID == ' ' ? "Y" : "N"},
{"pdbx_modified", "N"},
{"entity_id", entityID}
});
std::string iCode{si->iCode};
ba::trim(iCode);
@@ -5089,40 +5118,42 @@ void PDBFileParser::ParseConnectivtyAnnotation()
continue;
}
getCategory("struct_conn")->emplace({{"id", type + std::to_string(linkNr)},
{"conn_type_id", type},
getCategory("struct_conn")->emplace({
{"id", type + std::to_string(linkNr)},
{"conn_type_id", type},
// { "ccp4_link_id", ccp4LinkID },
// { "ccp4_link_id", ccp4LinkID },
{"ptnr1_label_asym_id", p1Asym},
{"ptnr1_label_comp_id", vS(18, 20)},
{"ptnr1_label_seq_id", (isResseq1 and p1Seq) ? std::to_string(p1Seq) : "."},
{"ptnr1_label_atom_id", vS(13, 16)},
{"pdbx_ptnr1_label_alt_id", vS(17, 17)},
{"pdbx_ptnr1_PDB_ins_code", vS(27, 27)},
{"pdbx_ptnr1_standard_comp_id", ""},
{"ptnr1_symmetry", sym1},
{"ptnr1_label_asym_id", p1Asym},
{"ptnr1_label_comp_id", vS(18, 20)},
{"ptnr1_label_seq_id", (isResseq1 and p1Seq) ? std::to_string(p1Seq) : "."},
{"ptnr1_label_atom_id", vS(13, 16)},
{"pdbx_ptnr1_label_alt_id", vS(17, 17)},
{"pdbx_ptnr1_PDB_ins_code", vS(27, 27)},
{"pdbx_ptnr1_standard_comp_id", ""},
{"ptnr1_symmetry", sym1},
{"ptnr2_label_asym_id", p2Asym},
{"ptnr2_label_comp_id", vS(48, 50)},
{"ptnr2_label_seq_id", (isResseq2 and p2Seq) ? std::to_string(p2Seq) : "."},
{"ptnr2_label_atom_id", vS(43, 46)},
{"pdbx_ptnr2_label_alt_id", vS(47, 47)},
{"pdbx_ptnr2_PDB_ins_code", vS(57, 57)},
{"ptnr2_label_asym_id", p2Asym},
{"ptnr2_label_comp_id", vS(48, 50)},
{"ptnr2_label_seq_id", (isResseq2 and p2Seq) ? std::to_string(p2Seq) : "."},
{"ptnr2_label_atom_id", vS(43, 46)},
{"pdbx_ptnr2_label_alt_id", vS(47, 47)},
{"pdbx_ptnr2_PDB_ins_code", vS(57, 57)},
{"ptnr1_auth_asym_id", vS(22, 22)},
{"ptnr1_auth_comp_id", vS(18, 20)},
{"ptnr1_auth_seq_id", vI(23, 26)},
{"ptnr2_auth_asym_id", vS(52, 52)},
{"ptnr2_auth_comp_id", vS(48, 50)},
{"ptnr2_auth_seq_id", vI(53, 56)},
{"ptnr1_auth_asym_id", vS(22, 22)},
{"ptnr1_auth_comp_id", vS(18, 20)},
{"ptnr1_auth_seq_id", vI(23, 26)},
{"ptnr2_auth_asym_id", vS(52, 52)},
{"ptnr2_auth_comp_id", vS(48, 50)},
{"ptnr2_auth_seq_id", vI(53, 56)},
// { "ptnr1_auth_atom_id", vS(13, 16) },
// { "ptnr2_auth_atom_id", vS(43, 46) },
// { "ptnr1_auth_atom_id", vS(13, 16) },
// { "ptnr2_auth_atom_id", vS(43, 46) },
{"ptnr2_symmetry", sym2},
{"ptnr2_symmetry", sym2},
{"pdbx_dist_value", distance}});
{"pdbx_dist_value", distance}
});
continue;
}
@@ -5163,24 +5194,26 @@ void PDBFileParser::ParseConnectivtyAnnotation()
std::string iCode1str = iCode1 == ' ' ? std::string() : std::string{iCode1};
std::string iCode2str = iCode2 == ' ' ? std::string() : std::string{iCode2};
getCategory("struct_mon_prot_cis")->emplace({{"pdbx_id", serNum},
{"label_comp_id", pep1},
{"label_seq_id", lResSeq1},
{"label_asym_id", lAsym1},
{"label_alt_id", "."},
{"pdbx_PDB_ins_code", iCode1str},
{"auth_comp_id", pep1},
{"auth_seq_id", seqNum1},
{"auth_asym_id", std::string{chainID1}},
{"pdbx_label_comp_id_2", pep2},
{"pdbx_label_seq_id_2", lResSeq2},
{"pdbx_label_asym_id_2", lAsym2},
{"pdbx_PDB_ins_code_2", iCode2str},
{"pdbx_auth_comp_id_2", pep2},
{"pdbx_auth_seq_id_2", seqNum2},
{"pdbx_auth_asym_id_2", std::string{chainID2}},
{"pdbx_PDB_model_num", modNum},
{"pdbx_omega_angle", measure}});
getCategory("struct_mon_prot_cis")->emplace({
{"pdbx_id", serNum},
{"label_comp_id", pep1},
{"label_seq_id", lResSeq1},
{"label_asym_id", lAsym1},
{"label_alt_id", "."},
{"pdbx_PDB_ins_code", iCode1str},
{"auth_comp_id", pep1},
{"auth_seq_id", seqNum1},
{"auth_asym_id", std::string{chainID1}},
{"pdbx_label_comp_id_2", pep2},
{"pdbx_label_seq_id_2", lResSeq2},
{"pdbx_label_asym_id_2", lAsym2},
{"pdbx_PDB_ins_code_2", iCode2str},
{"pdbx_auth_comp_id_2", pep2},
{"pdbx_auth_seq_id_2", seqNum2},
{"pdbx_auth_asym_id_2", std::string{chainID2}},
{"pdbx_PDB_model_num", modNum},
{"pdbx_omega_angle", measure}
});
continue;
}
@@ -5539,27 +5572,38 @@ void PDBFileParser::ParseCoordinate(int modelNr)
}
}
getCategory("atom_site")->emplace({{"group_PDB", groupPDB},
{"id", mAtomID},
{"type_symbol", element},
{"label_atom_id", name},
{"label_alt_id", altLoc != ' ' ? std::string{altLoc} : "."},
{"label_comp_id", resName},
{"label_asym_id", asymID},
{"label_entity_id", entityID},
{"label_seq_id", (isResseq and seqID > 0) ? std::to_string(seqID) : "."},
{"pdbx_PDB_ins_code", iCode == ' ' ? "" : std::string{iCode}},
{"Cartn_x", x},
{"Cartn_y", y},
{"Cartn_z", z},
{"occupancy", occupancy},
{"B_iso_or_equiv", tempFactor},
{"pdbx_formal_charge", charge},
{"auth_seq_id", resSeq},
{"auth_comp_id", resName},
{"auth_asym_id", std::string{chainID}},
{"auth_atom_id", name},
{"pdbx_PDB_model_num", modelNr}});
// if the atom is part of a sugar, we need to replace the auth_seq_id/resSeq
if (mSugarEntities.count(entityID))
{
using namespace cif::literals;
auto &branch_scheme = *getCategory("pdbx_branch_scheme");
resSeq = branch_scheme.find1<int>("asym_id"_key == asymID and "auth_seq_num"_key == resSeq, "pdb_seq_num");
}
getCategory("atom_site")->emplace({
{"group_PDB", groupPDB},
{"id", mAtomID},
{"type_symbol", element},
{"label_atom_id", name},
{"label_alt_id", altLoc != ' ' ? std::string{altLoc} : "."},
{"label_comp_id", resName},
{"label_asym_id", asymID},
{"label_entity_id", entityID},
{"label_seq_id", (isResseq and seqID > 0) ? std::to_string(seqID) : "."},
{"pdbx_PDB_ins_code", iCode == ' ' ? "" : std::string{iCode}},
{"Cartn_x", x},
{"Cartn_y", y},
{"Cartn_z", z},
{"occupancy", occupancy},
{"B_iso_or_equiv", tempFactor},
{"pdbx_formal_charge", charge},
{"auth_seq_id", resSeq},
{"auth_comp_id", resName},
{"auth_asym_id", std::string{chainID}},
{"auth_atom_id", name},
{"pdbx_PDB_model_num", modelNr}
});
InsertAtomType(element);

View File

@@ -295,13 +295,27 @@ Quaternion Normalize(Quaternion q)
// --------------------------------------------------------------------
Quaternion ConstructFromAngleAxis(float angle, Point axis)
{
auto q = std::cos((angle * mmcif::kPI / 180) / 2);
auto s = std::sqrt(1 - q * q);
axis.normalize();
return Normalize(Quaternion{
static_cast<float>(q),
static_cast<float>(s * axis.mX),
static_cast<float>(s * axis.mY),
static_cast<float>(s * axis.mZ)});
}
std::tuple<double,Point> QuaternionToAngleAxis(Quaternion q)
{
if (q.R_component_1() > 1)
q = Normalize(q);
// angle:
double angle = 2 * acos(q.R_component_1());
double angle = 2 * std::acos(q.R_component_1());
angle = angle * 180 / kPI;
// axis:

View File

@@ -535,7 +535,7 @@ double CalculateHBondEnergy(Res &inDonor, Res &inAcceptor)
result = kCouplingConstant / distanceHO - kCouplingConstant / distanceHC + kCouplingConstant / distanceNC - kCouplingConstant / distanceNO;
// DSSP compatibility mode:
result = round(result * 1000) / 1000;
result = std::round(result * 1000) / 1000;
if (result < kMinHBondEnergy)
result = kMinHBondEnergy;
@@ -1230,7 +1230,7 @@ DSSPImpl::DSSPImpl(const Structure &s, int min_poly_proline_stretch_length)
void DSSPImpl::calculateSecondaryStructure()
{
auto &db = mStructure.getFile().data();
auto &db = mStructure.datablock();
for (auto r : db["struct_conn"].find(cif::Key("conn_type_id") == "disulf"))
{
std::string asym1, asym2;

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +0,0 @@
const char kRevision[] = R"(
lib@PROJECT_NAME@-version: @PROJECT_VERSION@
@BUILD_VERSION_STRING@
Date: @BUILD_DATE_TIME@
)";

BIN
test/1juh.cif.gz Normal file

Binary file not shown.

View File

@@ -32,12 +32,12 @@ int main(int argc, char* argv[])
mmcif::File f(testdir / ".."/"examples"/"1cbs.cif.gz");
mmcif::Structure structure(f);
auto &res = structure.getResidue("B", "REA");
auto &res = structure.getResidue("B");
structure.changeResidue(res, "RXA", {});
structure.cleanupEmptyCategories();
f.file().save(std::cout);
f.save(std::cout);
}
catch (const std::exception& e)
{

View File

@@ -78,8 +78,8 @@ BOOST_AUTO_TEST_CASE(create_nonpoly_1)
cif::VERBOSE = 1;
mmcif::File file;
file.file().loadDictionary("mmcif_pdbx_v50.dic");
file.createDatablock("TEST"); // create a datablock
file.loadDictionary("mmcif_pdbx_v50.dic");
file.emplace("TEST"); // create a datablock
mmcif::Structure structure(file);
@@ -118,6 +118,17 @@ HETATM C CHD . ? -4.342 36.262 -3.536 1.00 8.00 ?
auto expected = R"(
data_TEST
#
_pdbx_nonpoly_scheme.asym_id A
_pdbx_nonpoly_scheme.ndb_seq_num 1
_pdbx_nonpoly_scheme.entity_id 1
_pdbx_nonpoly_scheme.mon_id HEM
_pdbx_nonpoly_scheme.pdb_seq_num 1
_pdbx_nonpoly_scheme.auth_seq_num 1
_pdbx_nonpoly_scheme.pdb_mon_id HEM
_pdbx_nonpoly_scheme.auth_mon_id HEM
_pdbx_nonpoly_scheme.pdb_strand_id A
_pdbx_nonpoly_scheme.pdb_ins_code .
#
loop_
_atom_site.id
@@ -141,10 +152,10 @@ _atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? ? HEM CHA 1
2 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? ? HEM CHB 1
3 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? ? HEM CHC 1
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? ? HEM CHD 1
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? 1 HEM CHA 1
2 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? 1 HEM CHB 1
3 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? 1 HEM CHC 1
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? 1 HEM CHD 1
#
_chem_comp.id HEM
_chem_comp.type NON-POLYMER
@@ -171,12 +182,12 @@ _struct_asym.details ?
expected.loadDictionary("mmcif_pdbx_v50.dic");
if (not (expected.firstDatablock() == structure.getFile().data()))
if (not (expected.firstDatablock() == structure.datablock()))
{
BOOST_TEST(false);
std::cout << expected.firstDatablock() << std::endl
<< std::endl
<< structure.getFile().data() << std::endl;
<< structure.datablock() << std::endl;
}
}
@@ -199,3 +210,150 @@ _struct_asym.details ?
// }
// }
// }
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(test_atom_id)
{
auto data = R"(
data_TEST
#
_pdbx_nonpoly_scheme.asym_id A
_pdbx_nonpoly_scheme.ndb_seq_num 1
_pdbx_nonpoly_scheme.entity_id 1
_pdbx_nonpoly_scheme.mon_id HEM
_pdbx_nonpoly_scheme.pdb_seq_num 1
_pdbx_nonpoly_scheme.auth_seq_num 1
_pdbx_nonpoly_scheme.pdb_mon_id HEM
_pdbx_nonpoly_scheme.auth_mon_id HEM
_pdbx_nonpoly_scheme.pdb_strand_id A
_pdbx_nonpoly_scheme.pdb_ins_code .
#
loop_
_atom_site.id
_atom_site.auth_asym_id
_atom_site.label_alt_id
_atom_site.label_asym_id
_atom_site.label_atom_id
_atom_site.label_comp_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.type_symbol
_atom_site.group_PDB
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? 1 HEM CHA 1
3 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? 1 HEM CHB 1
2 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? 1 HEM CHC 1
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? 1 HEM CHD 1
#
_chem_comp.id HEM
_chem_comp.type NON-POLYMER
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
_chem_comp.formula 'C34 H32 Fe N4 O4'
_chem_comp.formula_weight 616.487000
#
_pdbx_entity_nonpoly.entity_id 1
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
_pdbx_entity_nonpoly.comp_id HEM
#
_entity.id 1
_entity.type non-polymer
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
_entity.formula_weight 616.487000
#
_struct_asym.id A
_struct_asym.entity_id 1
_struct_asym.pdbx_blank_PDB_chainid_flag N
_struct_asym.pdbx_modified N
_struct_asym.details ?
#
)"_cf;
data.loadDictionary("mmcif_pdbx_v50.dic");
mmcif::Structure s(data);
BOOST_CHECK_EQUAL(s.getAtomByID("1").authAtomID(), "CHA");
BOOST_CHECK_EQUAL(s.getAtomByID("2").authAtomID(), "CHC");
BOOST_CHECK_EQUAL(s.getAtomByID("3").authAtomID(), "CHB");
BOOST_CHECK_EQUAL(s.getAtomByID("4").authAtomID(), "CHD");
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(atom_numbers_1)
{
const std::filesystem::path test1(gTestDir / ".." / "examples" / "1cbs.cif.gz");
mmcif::File file(test1.string());
mmcif::Structure structure(file);
auto &db = file.data();
auto &atoms = structure.atoms();
auto ai = atoms.begin();
for (const auto &[id, label_asym_id, label_seq_id, label_atom_id, auth_seq_id, label_comp_id] :
db["atom_site"].rows<std::string,std::string,int,std::string,std::string,std::string>("id", "label_asym_id", "label_seq_id", "label_atom_id", "auth_seq_id", "label_comp_id"))
{
auto atom = structure.getAtomByID(id);
BOOST_CHECK_EQUAL(atom.labelAsymID(), label_asym_id);
BOOST_CHECK_EQUAL(atom.labelSeqID(), label_seq_id);
BOOST_CHECK_EQUAL(atom.labelAtomID(), label_atom_id);
BOOST_CHECK_EQUAL(atom.authSeqID(), auth_seq_id);
BOOST_CHECK_EQUAL(atom.labelCompID(), label_comp_id);
BOOST_ASSERT(ai != atoms.end());
BOOST_CHECK_EQUAL(ai->id(), id);
++ai;
}
BOOST_ASSERT(ai == atoms.end());
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(test_load_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
mmcif::File file(example.string());
auto &db = file.data();
mmcif::Structure s(file);
BOOST_CHECK(s.polymers().size() == 1);
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
for (auto &poly : s.polymers())
{
BOOST_CHECK_EQUAL(poly.size(), pdbx_poly_seq_scheme.find("asym_id"_key == poly.asymID()).size());
}
}
BOOST_AUTO_TEST_CASE(remove_residue_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
mmcif::File file(example.string());
mmcif::Structure s(file);
s.removeResidue(s.getResidue("B"));
BOOST_CHECK_NO_THROW(s.validateAtoms());
}

199
test/sugar-test.cpp Normal file
View File

@@ -0,0 +1,199 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define BOOST_TEST_ALTERNATIVE_INIT_API
#include <boost/test/included/unit_test.hpp>
#include <stdexcept>
#include "cif++/Cif++.hpp"
#include "cif++/Structure.hpp"
#include "cif++/CifValidator.hpp"
// --------------------------------------------------------------------
cif::File operator""_cf(const char* text, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char* text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char*>(text), length);
std::istream is(&buffer);
return cif::File(is);
}
// --------------------------------------------------------------------
std::filesystem::path gTestDir = std::filesystem::current_path();
bool init_unit_test()
{
cif::VERBOSE = 1;
// not a test, just initialize test dir
if (boost::unit_test::framework::master_test_suite().argc == 2)
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
// do this now, avoids the need for installing
cif::addFileResource("mmcif_pdbx_v50.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx_v50.dic");
// initialize CCD location
cif::addFileResource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
mmcif::CompoundFactory::instance().pushDictionary(gTestDir / "HEM.cif");
return true;
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(sugar_name_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
mmcif::File file(example.string());
mmcif::Structure s(file);
auto &db = s.datablock();
auto &entity = db["entity"];
auto &branches = s.branches();
BOOST_CHECK_EQUAL(branches.size(), 4);
for (auto &branch : branches)
{
auto entityID = branch.front().entityID();
auto name = entity.find1<std::string>("id"_key == entityID, "pdbx_description");
BOOST_CHECK_EQUAL(branch.name(), name);
}
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(create_sugar_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
mmcif::File file(example.string());
mmcif::Structure s(file);
// collect atoms from asym L first
auto &NAG = s.getResidue("L");
auto nagAtoms = NAG.atoms();
std::vector<std::vector<cif::Item>> ai;
auto &db = s.datablock();
auto &as = db["atom_site"];
for (auto r : as.find("label_asym_id"_key == "L"))
ai.emplace_back(r.begin(), r.end());
s.removeResidue(NAG);
auto &branch = s.createBranch(ai);
BOOST_CHECK_EQUAL(branch.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose");
BOOST_CHECK_EQUAL(branch.size(), 1);
BOOST_CHECK_EQUAL(branch[0].atoms().size(), nagAtoms.size());
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(create_sugar_2)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
mmcif::File file(example.string());
mmcif::Structure s(file);
// Get branch for H
auto &bH = s.getBranchByAsymID("H");
BOOST_CHECK_EQUAL(bH.size(), 2);
std::vector<std::vector<cif::Item>> ai[2];
auto &db = s.datablock();
auto &as = db["atom_site"];
for (size_t i = 0; i < 2; ++i)
{
for (auto r : as.find("label_asym_id"_key == "H" and "auth_seq_id"_key == i + 1))
ai[i].emplace_back(r.begin(), r.end());
}
s.removeBranch(bH);
auto &bN = s.createBranch(ai[0]);
s.extendBranch(bN.asymID(), ai[1], 1, "O4");
BOOST_CHECK_EQUAL(bN.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose-(1-4)-2-acetamido-2-deoxy-beta-D-glucopyranose");
BOOST_CHECK_EQUAL(bN.size(), 2);
file.save(gTestDir / "test-create_sugar_2.cif");
BOOST_CHECK_NO_THROW(mmcif::Structure s2(file));
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(delete_sugar_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
mmcif::File file(example.string());
mmcif::Structure s(file);
// Get branch for H
auto &bG = s.getBranchByAsymID("G");
BOOST_CHECK_EQUAL(bG.size(), 4);
s.removeResidue(bG[1]);
BOOST_CHECK_EQUAL(bG.size(), 1);
auto &bN = s.getBranchByAsymID("G");
BOOST_CHECK_EQUAL(bN.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose");
BOOST_CHECK_EQUAL(bN.size(), 1);
file.save(gTestDir / "test-create_sugar_3.cif");
BOOST_CHECK_NO_THROW(mmcif::Structure s2(file));
}

File diff suppressed because it is too large Load Diff