Compare commits

...

25 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
87486f87ef revert loading compressed dictionaries 2022-07-12 14:20:01 +02:00
Maarten L. Hekkelman
80e7da0f13 Locating dictionaries updated 2022-07-12 14:13:59 +02:00
Maarten L. Hekkelman
3745beae66 Fix search order for resources 2022-07-12 13:55:19 +02:00
Maarten L. Hekkelman
3965840bfa New way of locating resources 2022-07-12 13:49:02 +02:00
Maarten L. Hekkelman
a88c6f3d32 Fix for older clang (on MacOS?). 2022-07-05 11:07:12 +02:00
Maarten L. Hekkelman
ed6c6f0026 Move assignment of Structure is not possible due to reference to datablock 2022-07-05 09:46:59 +02:00
Maarten L. Hekkelman
bdda9d72b5 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-07-05 09:36:36 +02:00
Maarten L. Hekkelman
fd080e778e Changes for MacOS/MSVC 2022-07-05 09:36:26 +02:00
Maarten L. Hekkelman
9f72df2ecd Update LICENSE
Format changed, now recognized
2022-07-01 08:21:22 +02:00
ojcharles
617db012f0 Update Cif++.cpp (#15)
add mutex library for std::unique_lock, not included in std lib
2022-07-01 08:07:24 +02:00
Maarten L. Hekkelman
9d15541237 include cstring for gnu c++ 12 2022-06-22 08:53:05 +02:00
Maarten L. Hekkelman
35c99564c6 Fix importing sugars from PDB files 2022-06-01 15:17:54 +02:00
Maarten L. Hekkelman
1d8fe334d6 Fix writing sugar branches 2022-06-01 13:22:34 +02:00
Maarten L. Hekkelman
d86bb314ac Better handling of missing residues/mismatch seqres 2022-06-01 11:27:41 +02:00
Maarten L. Hekkelman
0ef8eb59f8 Fix scattering factors error 2022-05-18 13:04:42 +02:00
Maarten L. Hekkelman
b5fe4a9a87 locating resources that might be protected 2022-05-18 11:53:13 +02:00
Maarten L. Hekkelman
11fea31b98 more loading resources 2022-05-18 11:37:26 +02:00
Maarten L. Hekkelman
f629275ed5 locating resources that might be protected 2022-05-18 11:25:47 +02:00
Maarten L. Hekkelman
a5f6166469 locating resources that might be protected 2022-05-18 11:14:14 +02:00
Maarten L. Hekkelman
501050e591 Add move constructor to mmcif::Structure 2022-05-10 17:11:04 +02:00
Maarten L. Hekkelman
e1b240b2b2 sugar work 2022-05-04 16:48:28 +02:00
Maarten L. Hekkelman
3d79278ed7 Merge branch 'trunk' into develop 2022-05-04 09:51:15 +02:00
Maarten L. Hekkelman
5e0b197a43 mmcif::Atom::compound() revision 2022-05-04 09:50:24 +02:00
Maarten L. Hekkelman
af721eb196 Make having no compound less fatal 2022-05-02 14:40:22 +02:00
Maarten L. Hekkelman
788e315f5e Fix entity_branch_link entry 2022-05-02 12:24:35 +02:00
15 changed files with 720 additions and 452 deletions

1
.gitignore vendored
View File

@@ -13,3 +13,4 @@ msvc/
Testing/
rsrc/feature-request.txt
test/1cbs.cif
test/test-create_sugar_?.cif

View File

@@ -25,7 +25,7 @@
cmake_minimum_required(VERSION 3.16)
# set the project name
project(cifpp VERSION 4.0.1 LANGUAGES CXX)
project(cifpp VERSION 4.2.0 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

View File

@@ -1,6 +1,7 @@
SPDX-License-Identifier: BSD-2-Clause
BSD-2-Clause License
Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@@ -20,4 +21,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -1,3 +1,14 @@
Version 4.2.0
- Yet another rewrite of resource loading
Version 4.1.1
- Fall back to zero charge for scattering factors if the atom
was not found in the table.
- Improve code to locate resources, failing less.
Version 4.1.0
- Some interface changes for mmcif::Atom
Version 4.0.1
- Added a bunch of const methods to Datablock and Category.
- Changed PDB writing interface to accept Datablock instead of File.

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -26,82 +26,82 @@
#pragma once
#include <string>
#include <array>
#include <cstring>
#include <functional>
#include <iomanip>
#include <iostream>
#include <list>
#include <optional>
#include <regex>
#include <set>
#include <sstream>
#include <iomanip>
#include <shared_mutex>
#include <sstream>
#include <string>
#include <boost/format.hpp>
#include "cif++/CifUtils.hpp"
/*
Simple C++ interface to CIF files.
Assumptions: a file contains one or more datablocks modelled by the class datablock.
Each datablock contains categories. These map to the original tables used to fill
the mmCIF file. Each Category can contain multiple Items, the columns in the table.
Values are stored as character strings internally.
Synopsis:
// create a cif file
cif::datablock e("1MVE");
e.append(cif::Category{"_entry", { "id", "1MVE" } });
cif::Category atomSite("atom_site");
size_t nr{};
for (auto& myAtom: atoms)
{
atomSite.push_back({
{ "group_PDB", "ATOM" },
{ "id", ++nr },
{ "type_symbol", myAtom.type.str() },
...
});
}
e.append(move(atomSite));
cif::File f;
f.append(e);
ofstream os("1mve.cif");
f.write(os);
Simple C++ interface to CIF files.
// read
f.read(ifstream{"1mve.cif"});
auto& e = f.firstDatablock();
cout << "ID of datablock: " << e.id() << endl;
auto& atoms = e["atom_site"];
for (auto& atom: atoms)
{
cout << atom["group_PDB"] << ", "
<< atom["id"] << ", "
...
Assumptions: a file contains one or more datablocks modelled by the class datablock.
Each datablock contains categories. These map to the original tables used to fill
the mmCIF file. Each Category can contain multiple Items, the columns in the table.
float x, y, z;
cif::tie(x, y, z) = atom.get("Cartn_x", "Cartn_y", "Cartn_z");
...
}
Values are stored as character strings internally.
Another way of querying a Category is by using this construct:
auto cat& = e["atom_site"];
auto Rows = cat.find(Key("label_asym_id") == "A" and Key("label_seq_id") == 1);
Synopsis:
// create a cif file
cif::datablock e("1MVE");
e.append(cif::Category{"_entry", { "id", "1MVE" } });
cif::Category atomSite("atom_site");
size_t nr{};
for (auto& myAtom: atoms)
{
atomSite.push_back({
{ "group_PDB", "ATOM" },
{ "id", ++nr },
{ "type_symbol", myAtom.type.str() },
...
});
}
e.append(move(atomSite));
cif::File f;
f.append(e);
ofstream os("1mve.cif");
f.write(os);
// read
f.read(ifstream{"1mve.cif"});
auto& e = f.firstDatablock();
cout << "ID of datablock: " << e.id() << endl;
auto& atoms = e["atom_site"];
for (auto& atom: atoms)
{
cout << atom["group_PDB"] << ", "
<< atom["id"] << ", "
...
float x, y, z;
cif::tie(x, y, z) = atom.get("Cartn_x", "Cartn_y", "Cartn_z");
...
}
Another way of querying a Category is by using this construct:
auto cat& = e["atom_site"];
auto Rows = cat.find(Key("label_asym_id") == "A" and Key("label_seq_id") == 1);
*/
@@ -147,12 +147,12 @@ class Item
Item(std::string_view name, char value)
: mName(name)
, mValue({ value })
, mValue({value})
{
}
template<typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
Item(std::string_view name, const T& value, const char *fmt)
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
Item(std::string_view name, const T &value, const char *fmt)
: mName(name)
, mValue((boost::format(fmt) % value).str())
{
@@ -284,10 +284,10 @@ class Datablock
friend bool operator==(const Datablock &lhs, const Datablock &rhs);
friend std::ostream& operator<<(std::ostream &os, const Datablock &data);
friend std::ostream &operator<<(std::ostream &os, const Datablock &data);
private:
CategoryList mCategories; // LRU
CategoryList mCategories; // LRU
mutable std::shared_mutex mLock;
std::string mName;
const Validator *mValidator;
@@ -1636,7 +1636,7 @@ class conditional_iterator_proxy
using iterator = conditional_iterator_impl;
using reference = typename iterator::reference;
template<typename... Ns>
template <typename... Ns>
conditional_iterator_proxy(CategoryType &cat, row_iterator pos, Condition &&cond, Ns... names);
conditional_iterator_proxy(conditional_iterator_proxy &&p);
@@ -1910,7 +1910,7 @@ class Category
conditional_iterator_proxy<const Category> find(const_iterator pos, Condition &&cond) const
{
return conditional_iterator_proxy<const Category>{const_cast<Category&>(*this), pos, std::forward<Condition>(cond)};
return conditional_iterator_proxy<const Category>{const_cast<Category &>(*this), pos, std::forward<Condition>(cond)};
}
template <typename... Ts, typename... Ns>
@@ -1931,14 +1931,14 @@ class Category
conditional_iterator_proxy<Category, Ts...> find(const_iterator pos, Condition &&cond, Ns... names)
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
return {*this, pos, std::forward<Condition>(cond), std::forward<Ns>(names)... };
return {*this, pos, std::forward<Condition>(cond), std::forward<Ns>(names)...};
}
template <typename... Ts, typename... Ns>
conditional_iterator_proxy<const Category, Ts...> find(const_iterator pos, Condition &&cond, Ns... names) const
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
return {*this, pos, std::forward<Condition>(cond), std::forward<Ns>(names)... };
return {*this, pos, std::forward<Condition>(cond), std::forward<Ns>(names)...};
}
// --------------------------------------------------------------------
@@ -1981,13 +1981,13 @@ class Category
}
template <typename T>
T find1(Condition &&cond, const char* column) const
T find1(Condition &&cond, const char *column) const
{
return find1<T>(cbegin(), std::forward<Condition>(cond), column);
}
template <typename T>
T find1(const_iterator pos, Condition &&cond, const char* column) const
T find1(const_iterator pos, Condition &&cond, const char *column) const
{
auto h = find<T>(pos, std::forward<Condition>(cond), column);
@@ -2237,7 +2237,7 @@ class File
append(new Datablock(name));
return back();
}
Datablock *get(std::string_view name) const;
Datablock &operator[](std::string_view name);

View File

@@ -80,7 +80,7 @@ class Atom
void moveTo(const Point &p);
const Compound &comp() const;
const Compound *compound() const;
const std::string get_property(const std::string_view name) const;
void set_property(const std::string_view name, const std::string &value);
@@ -186,7 +186,7 @@ class Atom
bool isSymmetryCopy() const { return impl().mSymmetryCopy; }
std::string symmetry() const { return impl().mSymmetryOperator; }
const Compound &comp() const { return impl().comp(); }
const Compound &compound() const;
bool isWater() const { return impl().mCompID == "HOH" or impl().mCompID == "H2O" or impl().mCompID == "WAT"; }
int charge() const;
@@ -527,6 +527,9 @@ class Sugar : public Residue
Sugar(const Branch &branch, const std::string &compoundID,
const std::string &asymID, int authSeqID);
Sugar(Sugar &&rhs);
Sugar &operator=(Sugar &&rhs);
int num() const { return std::stoi(mAuthSeqID); }
std::string name() const;
@@ -534,9 +537,14 @@ class Sugar : public Residue
Atom getLink() const { return mLink; }
void setLink(Atom link) { mLink = link; }
size_t getLinkNr() const
{
return mLink ? std::stoi(mLink.authSeqID()) : 0;
}
private:
const Branch &mBranch;
const Branch *mBranch;
Atom mLink;
};
@@ -623,10 +631,14 @@ class Structure
Structure(cif::Datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
Structure(Structure &&s) = default;
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
Structure(const Structure &);
Structure &operator=(const Structure &) = delete;
// Structure &operator=(Structure &&s) = default;
~Structure();
const AtomView &atoms() const { return mAtoms; }
@@ -710,7 +722,11 @@ class Structure
}
// Actions
void removeAtom(Atom &a);
void removeAtom(Atom &a)
{
removeAtom(a, true);
}
void swapAtoms(Atom a1, Atom a2); // swap the labels for these atoms
void moveAtom(Atom a, Point p); // move atom to a new location
void changeResidue(Residue &res, const std::string &newCompound,
@@ -815,6 +831,9 @@ class Structure
Atom &emplace_atom(Atom &&atom);
void removeAtom(Atom &a, bool removeFromResidue);
void removeSugar(Sugar &sugar);
cif::Datablock &mDb;
size_t mModelNr;
AtomView mAtoms;

View File

@@ -1083,6 +1083,20 @@ auto AtomTypeTraits::wksf(int charge) const -> const SFData&
return sf.sf;
}
if (charge != 0)
{
// Oops, not found. Fall back to zero charge and see if we can use that
if (cif::VERBOSE > 0)
std::cerr << "No scattering factor found for " << name() << " with charge " << charge << " will try to fall back to zero charge..." << std::endl;
for (auto& sf: data::kWKSFData)
{
if (sf.symbol == mInfo->type and sf.charge == 0)
return sf.sf;
}
}
throw std::runtime_error("No scattering factor found for " + name() + std::to_string(charge));
}

View File

@@ -393,8 +393,8 @@ BondMap::BondMap(const Structure &p)
{
std::vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[&](const Atom &a)
{ return a.labelAsymID() == asym_id and a.authSeqID() == pdb_seq_num; });
[id = asym_id, nr = pdb_seq_num](const Atom &a)
{ return a.labelAsymID() == id and a.authSeqID() == nr; });
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{

View File

@@ -34,6 +34,7 @@
#include <stack>
#include <tuple>
#include <unordered_map>
#include <mutex>
#include <filesystem>

View File

@@ -1199,76 +1199,97 @@ namespace cif
// --------------------------------------------------------------------
std::map<std::string,std::filesystem::path> gLocalResources;
std::filesystem::path gDataDir;
void addDataDirectory(std::filesystem::path dataDir)
class ResourcePool
{
if (VERBOSE > 0 and not fs::exists(dataDir))
std::cerr << "The specified data directory " << dataDir << " does not exist" << std::endl;
gDataDir = dataDir;
public:
static ResourcePool &instance()
{
static std::unique_ptr<ResourcePool> sInstance(new ResourcePool);
return *sInstance;
}
void pushDir(fs::path dir)
{
std::error_code ec;
if (fs::exists(dir, ec) and not ec)
mDirs.push_front(dir);
}
void pushDir(const char *path)
{
if (path != nullptr)
pushDir(fs::path(path));
}
void pushAlias(const std::string &name, std::filesystem::path dataFile)
{
std::error_code ec;
if (not fs::exists(dataFile, ec) or ec)
throw std::runtime_error("Attempt to add a file resource for " + name + " that cannot be used (" + dataFile.string() + ") :" + ec.message());
mLocalResources[name] = dataFile;
}
std::unique_ptr<std::istream> load(fs::path name);
private:
ResourcePool();
std::unique_ptr<std::ifstream> open(fs::path &p)
{
std::unique_ptr<std::ifstream> result;
try
{
if (fs::exists(p))
{
std::unique_ptr<std::ifstream> file(new std::ifstream(p, std::ios::binary));
if (file->is_open())
result.reset(file.release());
}
}
catch (...) {}
return result;
}
std::map<std::string,std::filesystem::path> mLocalResources;
std::deque<fs::path> mDirs;
};
ResourcePool::ResourcePool()
{
#if defined(DATA_DIR)
pushDir(DATA_DIR);
#endif
pushDir(getenv("LIBCIFPP_DATA_DIR"));
auto ccp4 = getenv("CCP4");
if (ccp4 != nullptr)
pushDir(fs::path(ccp4) / "share" / "libcifpp");
#if defined(CACHE_DIR)
pushDir(CACHE_DIR);
#endif
}
void addFileResource(const std::string &name, std::filesystem::path dataFile)
{
if (not fs::exists(dataFile))
throw std::runtime_error("Attempt to add a file resource for " + name + " that does not exist: " + dataFile.string());
gLocalResources[name] = dataFile;
}
std::unique_ptr<std::istream> loadResource(std::filesystem::path name)
std::unique_ptr<std::istream> ResourcePool::load(fs::path name)
{
std::unique_ptr<std::istream> result;
std::error_code ec;
fs::path p = name;
if (gLocalResources.count(name.string()))
{
std::unique_ptr<std::ifstream> file(new std::ifstream(gLocalResources[name.string()], std::ios::binary));
if (file->is_open())
result.reset(file.release());
}
if (mLocalResources.count(name.string()))
result = open(mLocalResources[name.string()]);
if (not result and not fs::exists(p) and not gDataDir.empty())
p = gDataDir / name;
#if defined(CACHE_DIR)
if (not result and not fs::exists(p))
for (auto di = mDirs.begin(); not result and di != mDirs.end(); ++di)
{
auto p2 = fs::path(CACHE_DIR) / p;
if (fs::exists(p2))
swap(p, p2);
}
#endif
#if defined(DATA_DIR)
if (not result and not fs::exists(p))
{
auto p2 = fs::path(DATA_DIR) / p;
if (fs::exists(p2))
swap(p, p2);
}
#endif
#if defined(CCP4) and CCP4
if (not result and not fs::exists(p))
{
const char* CCP4_DIR = getenv("CCP4");
if (CCP4_DIR != nullptr and fs::exists(CCP4_DIR))
{
auto p2 = fs::path(DATA_DIR) / p;
if (fs::exists(p2))
swap(p, p2);
}
}
#endif
if (not result and fs::exists(p))
{
std::unique_ptr<std::ifstream> file(new std::ifstream(p, std::ios::binary));
if (file->is_open())
result.reset(file.release());
auto p2 = *di / p;
if (fs::exists(p2, ec) and not ec)
result = open(p2);
}
if (not result and gResourceData)
@@ -1278,7 +1299,24 @@ std::unique_ptr<std::istream> loadResource(std::filesystem::path name)
result.reset(new mrsrc::istream(rsrc));
}
return result;
return result;
}
// --------------------------------------------------------------------
void addDataDirectory(std::filesystem::path dataDir)
{
ResourcePool::instance().pushDir(dataDir);
}
void addFileResource(const std::string &name, std::filesystem::path dataFile)
{
ResourcePool::instance().pushAlias(name, dataFile);
}
std::unique_ptr<std::istream> loadResource(std::filesystem::path name)
{
return ResourcePool::instance().load(name);
}
} // namespace cif

View File

@@ -378,9 +378,10 @@ const Validator &ValidatorFactory::operator[](std::string_view dictionary)
// not found, add it
fs::path dict_name(dictionary);
// too bad clang version 10 did not have a constructor for fs::path that accepts a std::string_view
fs::path dict_name(dictionary.data(), dictionary.data() + dictionary.length());
auto data = loadResource(dictionary);
auto data = loadResource(dict_name);
if (not data and dict_name.extension().string() != ".dic")
data = loadResource(dict_name.parent_path() / (dict_name.filename().string() + ".dic"));
@@ -389,20 +390,22 @@ const Validator &ValidatorFactory::operator[](std::string_view dictionary)
mValidators.emplace_back(dictionary, *data);
else
{
std::error_code ec;
// might be a compressed dictionary on disk
fs::path p = dictionary;
fs::path p = dict_name;
if (p.extension() == ".dic")
p = p.parent_path() / (p.filename().string() + ".gz");
else
p = p.parent_path() / (p.filename().string() + ".dic.gz");
#if defined(CACHE_DIR) and defined(DATA_DIR)
if (not fs::exists(p))
if (not fs::exists(p, ec) or ec)
{
for (const char *dir : {CACHE_DIR, DATA_DIR})
{
auto p2 = fs::path(dir) / p;
if (fs::exists(p2))
if (fs::exists(p2, ec) and not ec)
{
swap(p, p2);
break;
@@ -411,7 +414,7 @@ const Validator &ValidatorFactory::operator[](std::string_view dictionary)
}
#endif
if (fs::exists(p))
if (fs::exists(p, ec) and not ec)
{
std::ifstream file(p, std::ios::binary);
if (not file.is_open())

View File

@@ -274,7 +274,7 @@ class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryI
public:
CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next);
CompoundFactoryImpl(const std::filesystem::path &file, std::shared_ptr<CompoundFactoryImpl> next);
CompoundFactoryImpl(const fs::path &file, std::shared_ptr<CompoundFactoryImpl> next);
virtual ~CompoundFactoryImpl()
{
@@ -368,7 +368,7 @@ CompoundFactoryImpl::CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> ne
mKnownBases.insert(key);
}
CompoundFactoryImpl::CompoundFactoryImpl(const std::filesystem::path &file, std::shared_ptr<CompoundFactoryImpl> next)
CompoundFactoryImpl::CompoundFactoryImpl(const fs::path &file, std::shared_ptr<CompoundFactoryImpl> next)
: mNext(next)
{
cif::File cifFile(file);
@@ -684,7 +684,7 @@ void CompoundFactory::clear()
sInstance.reset();
}
void CompoundFactory::setDefaultDictionary(const std::filesystem::path &inDictFile)
void CompoundFactory::setDefaultDictionary(const fs::path &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());
@@ -701,7 +701,7 @@ void CompoundFactory::setDefaultDictionary(const std::filesystem::path &inDictFi
}
}
void CompoundFactory::pushDictionary(const std::filesystem::path &inDictFile)
void CompoundFactory::pushDictionary(const fs::path &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());

View File

@@ -1077,6 +1077,7 @@ class PDBFileParser
std::map<std::string, std::string> mBranch2EntityID;
std::map<std::string, std::string> mAsymID2EntityID;
std::map<std::string, std::string> mMod2parent;
std::set<std::string> mSugarEntities;
};
// --------------------------------------------------------------------
@@ -3776,6 +3777,10 @@ void PDBFileParser::ConstructEntities()
for (auto &chain : mChains)
{
std::string asymID = cif::cifIdForNumber(asymNr++);
if (mMolID2EntityID.count(chain.mMolID) == 0)
continue;
std::string entityID = mMolID2EntityID[chain.mMolID];
mAsymID2EntityID[asymID] = entityID;
@@ -3801,12 +3806,27 @@ void PDBFileParser::ConstructEntities()
for (std::string monID : monIds)
{
std::string authMonID, authSeqNum;
std::string authMonID, authSeqNum, authInsCode;
if (res.mSeen)
{
authMonID = monID;
authSeqNum = std::to_string(res.mSeqNum);
if (res.mIcode != ' ' and res.mIcode != 0)
authInsCode = std::string{res.mIcode};
}
else
{
authMonID = res.mMonID;
authSeqNum = std::to_string(res.mSeqNum);
if (res.mIcode != ' ' and res.mIcode != 0)
authInsCode = std::string{res.mIcode} + "A";
else
authInsCode = "A";
}
if (authInsCode.empty())
authInsCode = ".";
cat->emplace({{"asym_id", asymID},
{"entity_id", mMolID2EntityID[chain.mMolID]},
@@ -3818,7 +3838,7 @@ void PDBFileParser::ConstructEntities()
{"pdb_mon_id", authMonID},
{"auth_mon_id", authMonID},
{"pdb_strand_id", std::string{chain.mDbref.chainID}},
{"pdb_ins_code", (res.mIcode == ' ' or res.mIcode == 0) ? "." : std::string{res.mIcode}},
{"pdb_ins_code", authInsCode},
{"hetero", res.mAlts.empty() ? "n" : "y"}});
}
}
@@ -4586,14 +4606,20 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
}
}
mSugarEntities.insert(entityID);
// create an asym for this sugar tree
std::string asymID = cif::cifIdForNumber(asymNr++);
getCategory("struct_asym")->emplace({{"id", asymID},
{"pdbx_blank_PDB_chainid_flag", si->chainID == ' ' ? "Y" : "N"},
{"pdbx_modified", "N"},
{"entity_id", entityID}});
mAsymID2EntityID[asymID] = entityID;
getCategory("struct_asym")->emplace({
{"id", asymID},
{"pdbx_blank_PDB_chainid_flag", si->chainID == ' ' ? "Y" : "N"},
{"pdbx_modified", "N"},
{"entity_id", entityID}
});
std::string iCode{si->iCode};
ba::trim(iCode);
@@ -5092,40 +5118,42 @@ void PDBFileParser::ParseConnectivtyAnnotation()
continue;
}
getCategory("struct_conn")->emplace({{"id", type + std::to_string(linkNr)},
{"conn_type_id", type},
getCategory("struct_conn")->emplace({
{"id", type + std::to_string(linkNr)},
{"conn_type_id", type},
// { "ccp4_link_id", ccp4LinkID },
// { "ccp4_link_id", ccp4LinkID },
{"ptnr1_label_asym_id", p1Asym},
{"ptnr1_label_comp_id", vS(18, 20)},
{"ptnr1_label_seq_id", (isResseq1 and p1Seq) ? std::to_string(p1Seq) : "."},
{"ptnr1_label_atom_id", vS(13, 16)},
{"pdbx_ptnr1_label_alt_id", vS(17, 17)},
{"pdbx_ptnr1_PDB_ins_code", vS(27, 27)},
{"pdbx_ptnr1_standard_comp_id", ""},
{"ptnr1_symmetry", sym1},
{"ptnr1_label_asym_id", p1Asym},
{"ptnr1_label_comp_id", vS(18, 20)},
{"ptnr1_label_seq_id", (isResseq1 and p1Seq) ? std::to_string(p1Seq) : "."},
{"ptnr1_label_atom_id", vS(13, 16)},
{"pdbx_ptnr1_label_alt_id", vS(17, 17)},
{"pdbx_ptnr1_PDB_ins_code", vS(27, 27)},
{"pdbx_ptnr1_standard_comp_id", ""},
{"ptnr1_symmetry", sym1},
{"ptnr2_label_asym_id", p2Asym},
{"ptnr2_label_comp_id", vS(48, 50)},
{"ptnr2_label_seq_id", (isResseq2 and p2Seq) ? std::to_string(p2Seq) : "."},
{"ptnr2_label_atom_id", vS(43, 46)},
{"pdbx_ptnr2_label_alt_id", vS(47, 47)},
{"pdbx_ptnr2_PDB_ins_code", vS(57, 57)},
{"ptnr2_label_asym_id", p2Asym},
{"ptnr2_label_comp_id", vS(48, 50)},
{"ptnr2_label_seq_id", (isResseq2 and p2Seq) ? std::to_string(p2Seq) : "."},
{"ptnr2_label_atom_id", vS(43, 46)},
{"pdbx_ptnr2_label_alt_id", vS(47, 47)},
{"pdbx_ptnr2_PDB_ins_code", vS(57, 57)},
{"ptnr1_auth_asym_id", vS(22, 22)},
{"ptnr1_auth_comp_id", vS(18, 20)},
{"ptnr1_auth_seq_id", vI(23, 26)},
{"ptnr2_auth_asym_id", vS(52, 52)},
{"ptnr2_auth_comp_id", vS(48, 50)},
{"ptnr2_auth_seq_id", vI(53, 56)},
{"ptnr1_auth_asym_id", vS(22, 22)},
{"ptnr1_auth_comp_id", vS(18, 20)},
{"ptnr1_auth_seq_id", vI(23, 26)},
{"ptnr2_auth_asym_id", vS(52, 52)},
{"ptnr2_auth_comp_id", vS(48, 50)},
{"ptnr2_auth_seq_id", vI(53, 56)},
// { "ptnr1_auth_atom_id", vS(13, 16) },
// { "ptnr2_auth_atom_id", vS(43, 46) },
// { "ptnr1_auth_atom_id", vS(13, 16) },
// { "ptnr2_auth_atom_id", vS(43, 46) },
{"ptnr2_symmetry", sym2},
{"ptnr2_symmetry", sym2},
{"pdbx_dist_value", distance}});
{"pdbx_dist_value", distance}
});
continue;
}
@@ -5166,24 +5194,26 @@ void PDBFileParser::ParseConnectivtyAnnotation()
std::string iCode1str = iCode1 == ' ' ? std::string() : std::string{iCode1};
std::string iCode2str = iCode2 == ' ' ? std::string() : std::string{iCode2};
getCategory("struct_mon_prot_cis")->emplace({{"pdbx_id", serNum},
{"label_comp_id", pep1},
{"label_seq_id", lResSeq1},
{"label_asym_id", lAsym1},
{"label_alt_id", "."},
{"pdbx_PDB_ins_code", iCode1str},
{"auth_comp_id", pep1},
{"auth_seq_id", seqNum1},
{"auth_asym_id", std::string{chainID1}},
{"pdbx_label_comp_id_2", pep2},
{"pdbx_label_seq_id_2", lResSeq2},
{"pdbx_label_asym_id_2", lAsym2},
{"pdbx_PDB_ins_code_2", iCode2str},
{"pdbx_auth_comp_id_2", pep2},
{"pdbx_auth_seq_id_2", seqNum2},
{"pdbx_auth_asym_id_2", std::string{chainID2}},
{"pdbx_PDB_model_num", modNum},
{"pdbx_omega_angle", measure}});
getCategory("struct_mon_prot_cis")->emplace({
{"pdbx_id", serNum},
{"label_comp_id", pep1},
{"label_seq_id", lResSeq1},
{"label_asym_id", lAsym1},
{"label_alt_id", "."},
{"pdbx_PDB_ins_code", iCode1str},
{"auth_comp_id", pep1},
{"auth_seq_id", seqNum1},
{"auth_asym_id", std::string{chainID1}},
{"pdbx_label_comp_id_2", pep2},
{"pdbx_label_seq_id_2", lResSeq2},
{"pdbx_label_asym_id_2", lAsym2},
{"pdbx_PDB_ins_code_2", iCode2str},
{"pdbx_auth_comp_id_2", pep2},
{"pdbx_auth_seq_id_2", seqNum2},
{"pdbx_auth_asym_id_2", std::string{chainID2}},
{"pdbx_PDB_model_num", modNum},
{"pdbx_omega_angle", measure}
});
continue;
}
@@ -5542,27 +5572,38 @@ void PDBFileParser::ParseCoordinate(int modelNr)
}
}
getCategory("atom_site")->emplace({{"group_PDB", groupPDB},
{"id", mAtomID},
{"type_symbol", element},
{"label_atom_id", name},
{"label_alt_id", altLoc != ' ' ? std::string{altLoc} : "."},
{"label_comp_id", resName},
{"label_asym_id", asymID},
{"label_entity_id", entityID},
{"label_seq_id", (isResseq and seqID > 0) ? std::to_string(seqID) : "."},
{"pdbx_PDB_ins_code", iCode == ' ' ? "" : std::string{iCode}},
{"Cartn_x", x},
{"Cartn_y", y},
{"Cartn_z", z},
{"occupancy", occupancy},
{"B_iso_or_equiv", tempFactor},
{"pdbx_formal_charge", charge},
{"auth_seq_id", resSeq},
{"auth_comp_id", resName},
{"auth_asym_id", std::string{chainID}},
{"auth_atom_id", name},
{"pdbx_PDB_model_num", modelNr}});
// if the atom is part of a sugar, we need to replace the auth_seq_id/resSeq
if (mSugarEntities.count(entityID))
{
using namespace cif::literals;
auto &branch_scheme = *getCategory("pdbx_branch_scheme");
resSeq = branch_scheme.find1<int>("asym_id"_key == asymID and "auth_seq_num"_key == resSeq, "pdb_seq_num");
}
getCategory("atom_site")->emplace({
{"group_PDB", groupPDB},
{"id", mAtomID},
{"type_symbol", element},
{"label_atom_id", name},
{"label_alt_id", altLoc != ' ' ? std::string{altLoc} : "."},
{"label_comp_id", resName},
{"label_asym_id", asymID},
{"label_entity_id", entityID},
{"label_seq_id", (isResseq and seqID > 0) ? std::to_string(seqID) : "."},
{"pdbx_PDB_ins_code", iCode == ' ' ? "" : std::string{iCode}},
{"Cartn_x", x},
{"Cartn_y", y},
{"Cartn_z", z},
{"occupancy", occupancy},
{"B_iso_or_equiv", tempFactor},
{"pdbx_formal_charge", charge},
{"auth_seq_id", resSeq},
{"auth_comp_id", resName},
{"auth_asym_id", std::string{chainID}},
{"auth_atom_id", name},
{"pdbx_PDB_model_num", modelNr}
});
InsertAtomType(element);

View File

@@ -127,15 +127,12 @@ bool Atom::AtomImpl::getAnisoU(float anisou[6]) const
auto cat = mDb.get("atom_site_anisotrop");
if (cat)
{
try
for (auto r : cat->find(cif::Key("id") == mID))
{
auto r = cat->find1(cif::Key("id") == mID);
cif::tie(anisou[0], anisou[1], anisou[2], anisou[3], anisou[4], anisou[5]) =
r.get("U[1][1]", "U[1][2]", "U[1][3]", "U[2][2]", "U[2][3]", "U[3][3]");
result = true;
}
catch (const std::exception &e)
{
break;
}
}
@@ -148,20 +145,10 @@ int Atom::AtomImpl::charge() const
if (not formalCharge.has_value())
{
auto &compound = comp();
auto c = compound();
if (compound.atoms().size() == 1)
formalCharge = compound.atoms().front().charge;
// {
// for (auto cAtom : compound.atoms())
// {
// if (cAtom.id != mAtomID)
// continue;
// formalCharge = cAtom.charge;
// break;
// }
// }
if (c != nullptr and c->atoms().size() == 1)
formalCharge = c->atoms().front().charge;
}
return formalCharge.value_or(0);
@@ -189,23 +176,16 @@ void Atom::AtomImpl::moveTo(const Point &p)
mLocation = p;
}
const Compound &Atom::AtomImpl::comp() const
const Compound *Atom::AtomImpl::compound() const
{
if (mCompound == nullptr)
{
std::string compID;
cif::tie(compID) = mRow.get("label_comp_id");
std::string compID = get_property("label_comp_id");
mCompound = CompoundFactory::instance().create(compID);
if (cif::VERBOSE > 0 and mCompound == nullptr)
std::cerr << "Compound not found: '" << compID << '\'' << std::endl;
}
if (mCompound == nullptr)
throw std::runtime_error("no compound");
return *mCompound;
return mCompound;
}
const std::string Atom::AtomImpl::get_property(const std::string_view name) const
@@ -216,7 +196,7 @@ const std::string Atom::AtomImpl::get_property(const std::string_view name) cons
return ref.as<std::string>();
}
mCachedRefs.emplace_back(name, const_cast<cif::Row&>(mRow)[name]);
mCachedRefs.emplace_back(name, const_cast<cif::Row &>(mRow)[name]);
return std::get<1>(mCachedRefs.back()).as<std::string>();
}
@@ -282,6 +262,21 @@ std::string Atom::pdbID() const
get_property<std::string>("pdbx_PDB_ins_code");
}
const Compound &Atom::compound() const
{
auto result = impl().compound();
if (result == nullptr)
{
if (cif::VERBOSE > 0)
std::cerr << "Compound not found: '" << get_property<std::string>("label_comp_id") << '\'' << std::endl;
throw std::runtime_error("no compound");
}
return *result;
}
int Atom::charge() const
{
return impl().charge();
@@ -357,8 +352,8 @@ bool Atom::operator==(const Atom &rhs) const
{
if (mImpl == rhs.mImpl)
return true;
if (not (mImpl and rhs.mImpl))
if (not(mImpl and rhs.mImpl))
return false;
return &mImpl->mDb == &rhs.mImpl->mDb and mImpl->mID == rhs.mImpl->mID;
@@ -1130,10 +1125,28 @@ int Polymer::Distance(const Monomer &a, const Monomer &b) const
Sugar::Sugar(const Branch &branch, const std::string &compoundID,
const std::string &asymID, int authSeqID)
: Residue(branch.structure(), compoundID, asymID, 0, std::to_string(authSeqID))
, mBranch(branch)
, mBranch(&branch)
{
}
Sugar::Sugar(Sugar &&rhs)
: Residue(std::forward<Residue>(rhs))
, mBranch(rhs.mBranch)
{
}
Sugar &Sugar::operator=(Sugar &&rhs)
{
if (this != &rhs)
{
Residue::operator=(std::forward<Residue>(rhs));
mBranch = rhs.mBranch;
}
return *this;
}
// bool Sugar::hasLinkedSugarAtLeavingO(int leavingO) const
// {
// return false;
@@ -1185,21 +1198,19 @@ Branch::Branch(Structure &structure, const std::string &asymID)
auto &db = structure.datablock();
auto &struct_asym = db["struct_asym"];
auto &branch_list = db["pdbx_entity_branch_list"];
auto &branch_scheme = db["pdbx_branch_scheme"];
auto &branch_link = db["pdbx_entity_branch_link"];
for (const auto &[entity_id] : struct_asym.find<std::string>("id"_key == asymID, "entity_id"))
{
for (const auto&[comp_id, num] : branch_list.find<std::string,int>(
"entity_id"_key == entity_id, "comp_id", "num"
))
for (const auto &[comp_id, num] : branch_scheme.find<std::string, int>(
"asym_id"_key == asymID, "mon_id", "pdb_seq_num"))
{
emplace_back(*this, comp_id, asymID, num);
}
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"
))
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
{
if (not cif::iequals(atom1, "c1"))
throw std::runtime_error("invalid pdbx_entity_branch_link");
@@ -1224,8 +1235,7 @@ void Branch::linkAtoms()
auto entity_id = front().entityID();
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"
))
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
{
if (not cif::iequals(atom1, "c1"))
throw std::runtime_error("invalid pdbx_entity_branch_link");
@@ -1242,7 +1252,6 @@ std::string Branch::name() const
return empty() ? "" : name(front());
}
std::string Branch::name(const Sugar &s) const
{
using namespace cif::literals;
@@ -1261,19 +1270,18 @@ std::string Branch::name(const Sugar &s) const
if (not result.empty() and result.back() != ']')
result += '-';
return result + s.name();
}
float Branch::weight() const
{
return std::accumulate(begin(), end(), 0.f, [](float sum, const Sugar &s)
{
{
auto compound = mmcif::CompoundFactory::instance().create(s.compoundID());
if (compound)
sum += compound->formulaWeight();
return sum;
});
return sum; });
}
// --------------------------------------------------------------------
@@ -1460,12 +1468,8 @@ void Structure::loadData()
{
auto &polySeqScheme = category("pdbx_poly_seq_scheme");
for (auto &r : polySeqScheme)
for (const auto &[asymID, entityID] : polySeqScheme.rows<std::string,std::string>("asym_id", "entity_id"))
{
std::string asymID, entityID, seqID, monID;
cif::tie(asymID, entityID, seqID, monID) =
r.get("asym_id", "entity_id", "seq_id", "mon_id");
if (mPolymers.empty() or mPolymers.back().asymID() != asymID or mPolymers.back().entityID() != entityID)
mPolymers.emplace_back(*this, entityID, asymID);
}
@@ -1480,14 +1484,8 @@ void Structure::loadData()
auto &nonPolyScheme = category("pdbx_nonpoly_scheme");
for (auto &r : nonPolyScheme)
{
std::string asymID, monID, pdbSeqNum;
cif::tie(asymID, monID, pdbSeqNum) =
r.get("asym_id", "mon_id", "pdb_seq_num");
for (const auto&[asymID, monID, pdbSeqNum] : nonPolyScheme.rows<std::string,std::string,std::string>("asym_id", "mon_id", "pdb_seq_num"))
mNonPolymers.emplace_back(*this, monID, asymID, 0, pdbSeqNum);
}
// place atoms in residues
@@ -1528,7 +1526,7 @@ void Structure::loadData()
{
if (res.asymID() != atom.labelAsymID())
continue;
res.addAtom(atom);
break;
}
@@ -1584,28 +1582,23 @@ EntityType Structure::getEntityTypeForAsymID(const std::string asymID) const
AtomView Structure::waters() const
{
using namespace cif::literals;
AtomView result;
auto &db = datablock();
// Get the entity id for water
// Get the entity id for water. Watch out, structure may not have water at all
auto &entityCat = db["entity"];
std::string waterEntityID;
for (auto &e : entityCat)
for (const auto &[waterEntityID] : entityCat.find<std::string>("type"_key == "water", "id"))
{
std::string id, type;
cif::tie(id, type) = e.get("id", "type");
if (ba::iequals(type, "water"))
for (auto &a : mAtoms)
{
waterEntityID = id;
break;
if (a.get_property<std::string>("label_entity_id") == waterEntityID)
result.push_back(a);
}
}
for (auto &a : mAtoms)
{
if (a.get_property<std::string>("label_entity_id") == waterEntityID)
result.push_back(a);
break;
}
return result;
@@ -1614,7 +1607,7 @@ AtomView Structure::waters() const
Atom Structure::getAtomByID(const std::string &id) const
{
assert(mAtoms.size() == mAtomIndex.size());
int L = 0, R = mAtoms.size() - 1;
while (L <= R)
{
@@ -1711,7 +1704,7 @@ Polymer &Structure::getPolymerByAsymID(const std::string &asymID)
{
if (poly.asymID() != asymID)
continue;
return poly;
}
@@ -1753,7 +1746,15 @@ Residue &Structure::getResidue(const std::string &asymID, int seqID, const std::
}
}
throw std::out_of_range("Could not find residue " + asymID + '/' + std::to_string(seqID) + '-' + authSeqID);
std::string desc = asymID;
if (seqID != 0)
desc += "/" + std::to_string(seqID);
if (not authSeqID.empty())
desc += "-" + authSeqID;
throw std::out_of_range("Could not find residue " + desc);
}
Residue &Structure::getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID)
@@ -1791,10 +1792,18 @@ Residue &Structure::getResidue(const std::string &asymID, const std::string &com
}
}
throw std::out_of_range("Could not find residue " + asymID + '/' + std::to_string(seqID) + '-' + authSeqID);
std::string desc = asymID;
if (seqID != 0)
desc += "/" + std::to_string(seqID);
if (not authSeqID.empty())
desc += "-" + authSeqID;
throw std::out_of_range("Could not find residue " + desc + " of type " + compID);
}
Branch& Structure::getBranchByAsymID(const std::string &asymID)
Branch &Structure::getBranchByAsymID(const std::string &asymID)
{
for (auto &branch : mBranches)
{
@@ -1856,7 +1865,7 @@ std::string Structure::insertCompound(const std::string &compoundID, bool isEnti
// --------------------------------------------------------------------
Atom& Structure::emplace_atom(Atom &&atom)
Atom &Structure::emplace_atom(Atom &&atom)
{
int L = 0, R = mAtomIndex.size() - 1;
while (L <= R)
@@ -1881,7 +1890,7 @@ Atom& Structure::emplace_atom(Atom &&atom)
return mAtoms.emplace_back(std::move(atom));
}
void Structure::removeAtom(Atom &a)
void Structure::removeAtom(Atom &a, bool removeFromResidue)
{
using namespace cif::literals;
@@ -1890,15 +1899,18 @@ void Structure::removeAtom(Atom &a)
auto &atomSites = db["atom_site"];
atomSites.erase("id"_key == a.id());
try
if (removeFromResidue)
{
auto &res = getResidue(a);
res.mAtoms.erase(std::remove(res.mAtoms.begin(), res.mAtoms.end(), a), res.mAtoms.end());
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << "Error removing atom from residue: " << ex.what() << std::endl;
try
{
auto &res = getResidue(a);
res.mAtoms.erase(std::remove(res.mAtoms.begin(), res.mAtoms.end(), a), res.mAtoms.end());
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << "Error removing atom from residue: " << ex.what() << std::endl;
}
}
assert(mAtomIndex.size() == mAtoms.size());
@@ -1940,7 +1952,7 @@ void Structure::removeAtom(Atom &a)
R = i - 1;
}
#ifndef NDEBUG
assert(removed);
assert(removed);
#endif
}
@@ -1964,7 +1976,7 @@ void Structure::swapAtoms(Atom a1, Atom a2)
auto l4 = r2["auth_atom_id"];
l3.swap(l4);
}
catch (const std::exception& ex)
catch (const std::exception &ex)
{
std::throw_with_nested(std::runtime_error("Failed to swap atoms"));
}
@@ -2085,19 +2097,16 @@ void Structure::removeResidue(Residue &res)
cif::Datablock &db = datablock();
auto atoms = res.atoms();
for (auto atom : atoms)
removeAtom(atom);
switch (res.entityType())
{
case EntityType::Polymer:
{
Monomer &monomer = dynamic_cast<Monomer&>(res);
Monomer &monomer = dynamic_cast<Monomer &>(res);
db["pdbx_poly_seq_scheme"].erase(
"asym_id"_key == res.asymID() and
"seq_id"_key == res.seqID()
);
"seq_id"_key == res.seqID());
for (auto &poly : mPolymers)
poly.erase(std::remove(poly.begin(), poly.end(), monomer), poly.end());
@@ -2114,14 +2123,106 @@ void Structure::removeResidue(Residue &res)
db["pdbx_nonpoly_scheme"].erase("asym_id"_key == res.asymID());
mNonPolymers.erase(std::remove(mNonPolymers.begin(), mNonPolymers.end(), res), mNonPolymers.end());
break;
case EntityType::Branched:
throw std::runtime_error("Don't remove a sugar using removeResidue...");
{
Sugar &sugar = dynamic_cast<Sugar&>(res);
removeSugar(sugar);
atoms.clear();
break;
}
case EntityType::Macrolide:
// TODO: Fix this?
throw std::runtime_error("no support for macrolides yet");
}
for (auto atom : atoms)
removeAtom(atom, false);
}
void Structure::removeSugar(Sugar &sugar)
{
using namespace cif::literals;
std::string asym_id = sugar.asymID();
Branch &branch = getBranchByAsymID(asym_id);
auto si = std::find(branch.begin(), branch.end(), sugar);
if (si == branch.end())
throw std::runtime_error("Sugar not part of branch");
size_t six = si - branch.begin();
if (six == 0) // first sugar, means the death of this branch
removeBranch(branch);
else
{
std::set<size_t> dix;
std::stack<size_t> test;
test.push(sugar.num());
while (not test.empty())
{
auto tix = test.top();
test.pop();
if (dix.count(tix))
continue;
dix.insert(tix);
for (auto atom : branch[tix - 1].atoms())
removeAtom(atom, false);
for (auto &s : branch)
{
if (s.getLinkNr() == tix)
test.push(s.num());
}
}
branch.erase(remove_if(branch.begin(), branch.end(), [dix](const Sugar &s) { return dix.count(s.num()); }), branch.end());
cif::Datablock &db = datablock();
auto entity_id = createEntityForBranch(branch);
// Update the entity id of the asym
auto &struct_asym = db["struct_asym"];
auto r = struct_asym.find1("id"_key == asym_id);
r["entity_id"] = entity_id;
for (auto &sugar : branch)
{
for (auto atom : sugar.atoms())
atom.set_property("label_entity_id", entity_id);
}
auto &pdbx_branch_scheme = db["pdbx_branch_scheme"];
pdbx_branch_scheme.erase("asym_id"_key == asym_id);
for (auto &sugar : branch)
{
pdbx_branch_scheme.emplace({
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", sugar.num()},
{"mon_id", sugar.compoundID()},
{"pdb_asym_id", asym_id},
{"pdb_seq_num", sugar.num()},
{"pdb_mon_id", sugar.compoundID()},
// TODO: need fix, collect from nag_atoms?
{"auth_asym_id", asym_id},
{"auth_mon_id", sugar.compoundID()},
{"auth_seq_num", sugar.authSeqID()},
{"hetero", "n"}
});
}
}
}
void Structure::removeBranch(Branch &branch)
@@ -2156,11 +2257,13 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve
auto &struct_asym = db["struct_asym"];
std::string asym_id = struct_asym.getUniqueID();
struct_asym.emplace({{"id", asym_id},
struct_asym.emplace({
{"id", asym_id},
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}});
{"details", "?"}
});
std::string comp_id = db["pdbx_entity_nonpoly"].find1<std::string>("entity_id"_key == entity_id, "comp_id");
@@ -2203,16 +2306,16 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve
auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"];
int ndb_nr = pdbx_nonpoly_scheme.find("asym_id"_key == asym_id and "entity_id"_key == entity_id).size() + 1;
pdbx_nonpoly_scheme.emplace({
{ "asym_id", asym_id },
{ "entity_id", entity_id },
{ "mon_id", comp_id },
{ "ndb_seq_num", ndb_nr },
{ "pdb_seq_num", res.authSeqID() },
{ "auth_seq_num", res.authSeqID() },
{ "pdb_mon_id", comp_id },
{ "auth_mon_id", comp_id },
{ "pdb_strand_id", asym_id },
{ "pdb_ins_code", "." },
{"asym_id", asym_id},
{"entity_id", entity_id},
{"mon_id", comp_id},
{"ndb_seq_num", ndb_nr},
{"pdb_seq_num", res.authSeqID()},
{"auth_seq_num", res.authSeqID()},
{"pdb_mon_id", comp_id},
{"auth_mon_id", comp_id},
{"pdb_strand_id", asym_id},
{"pdb_ins_code", "."},
});
return asym_id;
@@ -2231,7 +2334,8 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}});
{"details", "?"}
});
std::string comp_id = db["pdbx_entity_nonpoly"].find1<std::string>("entity_id"_key == entity_id, "comp_id");
@@ -2241,7 +2345,8 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci) { return ci.name() == name; }) == ai.end())
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci)
{ return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
@@ -2249,17 +2354,17 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
{
auto atom_id = atom_site.getUniqueID("");
appendUnlessSet(atom, { "group_PDB", "HETATM"} );
appendUnlessSet(atom, { "id", atom_id} );
appendUnlessSet(atom, { "label_comp_id", comp_id} );
appendUnlessSet(atom, { "label_asym_id", asym_id} );
appendUnlessSet(atom, { "label_seq_id", ""} );
appendUnlessSet(atom, { "label_entity_id", entity_id} );
appendUnlessSet(atom, { "auth_comp_id", comp_id} );
appendUnlessSet(atom, { "auth_asym_id", asym_id} );
appendUnlessSet(atom, { "auth_seq_id", 1} );
appendUnlessSet(atom, { "pdbx_PDB_model_num", 1} );
appendUnlessSet(atom, { "label_alt_id", ""} );
appendUnlessSet(atom, {"group_PDB", "HETATM"});
appendUnlessSet(atom, {"id", atom_id});
appendUnlessSet(atom, {"label_comp_id", comp_id});
appendUnlessSet(atom, {"label_asym_id", asym_id});
appendUnlessSet(atom, {"label_seq_id", ""});
appendUnlessSet(atom, {"label_entity_id", entity_id});
appendUnlessSet(atom, {"auth_comp_id", comp_id});
appendUnlessSet(atom, {"auth_asym_id", asym_id});
appendUnlessSet(atom, {"auth_seq_id", 1});
appendUnlessSet(atom, {"pdbx_PDB_model_num", 1});
appendUnlessSet(atom, {"label_alt_id", ""});
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
@@ -2270,22 +2375,22 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"];
int ndb_nr = pdbx_nonpoly_scheme.find("asym_id"_key == asym_id and "entity_id"_key == entity_id).size() + 1;
pdbx_nonpoly_scheme.emplace({
{ "asym_id", asym_id },
{ "entity_id", entity_id },
{ "mon_id", comp_id },
{ "ndb_seq_num", ndb_nr },
{ "pdb_seq_num", res.authSeqID() },
{ "auth_seq_num", res.authSeqID() },
{ "pdb_mon_id", comp_id },
{ "auth_mon_id", comp_id },
{ "pdb_strand_id", asym_id },
{ "pdb_ins_code", "." },
{"asym_id", asym_id},
{"entity_id", entity_id},
{"mon_id", comp_id},
{"ndb_seq_num", ndb_nr},
{"pdb_seq_num", res.authSeqID()},
{"auth_seq_num", res.authSeqID()},
{"pdb_mon_id", comp_id},
{"auth_mon_id", comp_id},
{"pdb_strand_id", asym_id},
{"pdb_ins_code", "."},
});
return asym_id;
}
Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
Branch &Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
{
// sanity check
for (auto &nag_atom : nag_atoms)
@@ -2311,7 +2416,8 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci) { return ci.name() == name; }) == ai.end())
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci)
{ return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
@@ -2319,17 +2425,17 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
{
auto atom_id = atom_site.getUniqueID("");
appendUnlessSet(atom, { "group_PDB", "HETATM"} );
appendUnlessSet(atom, { "id", atom_id} );
appendUnlessSet(atom, { "label_comp_id", "NAG"} );
appendUnlessSet(atom, { "label_asym_id", asym_id} );
appendUnlessSet(atom, { "label_seq_id", "."} );
appendUnlessSet(atom, { "label_entity_id", tmp_entity_id} );
appendUnlessSet(atom, { "auth_comp_id", "NAG"} );
appendUnlessSet(atom, { "auth_asym_id", asym_id} );
appendUnlessSet(atom, { "auth_seq_id", 1} );
appendUnlessSet(atom, { "pdbx_PDB_model_num", 1} );
appendUnlessSet(atom, { "label_alt_id", ""} );
appendUnlessSet(atom, {"group_PDB", "HETATM"});
appendUnlessSet(atom, {"id", atom_id});
appendUnlessSet(atom, {"label_comp_id", "NAG"});
appendUnlessSet(atom, {"label_asym_id", asym_id});
appendUnlessSet(atom, {"label_seq_id", "."});
appendUnlessSet(atom, {"label_entity_id", tmp_entity_id});
appendUnlessSet(atom, {"auth_comp_id", "NAG"});
appendUnlessSet(atom, {"auth_asym_id", asym_id});
appendUnlessSet(atom, {"auth_seq_id", 1});
appendUnlessSet(atom, {"pdbx_PDB_model_num", 1});
appendUnlessSet(atom, {"label_alt_id", ""});
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
@@ -2345,33 +2451,34 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}});
{"details", "?"}
});
for (auto &a : sugar.atoms())
a.set_property("label_entity_id", entity_id);
db["pdbx_branch_scheme"].emplace({
{ "asym_id", asym_id },
{ "entity_id", entity_id },
{ "num", 1 },
{ "mon_id", "NAG" },
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", 1},
{"mon_id", "NAG"},
{ "pdb_asym_id", asym_id },
{ "pdb_seq_num", 1 },
{ "pdb_mon_id", "NAG" },
{"pdb_asym_id", asym_id},
{"pdb_seq_num", 1},
{"pdb_mon_id", "NAG"},
// TODO: need fix, collect from nag_atoms?
{ "auth_asym_id", asym_id },
{ "auth_mon_id", "NAG" },
{ "auth_seq_num", 1 },
{"auth_asym_id", asym_id},
{"auth_mon_id", "NAG"},
{"auth_seq_num", 1},
{ "hetero", "n" }
{"hetero", "n"}
});
return branch;
}
Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atom_info,
Branch &Structure::extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atom_info,
int link_sugar, const std::string &link_atom)
{
// sanity check
@@ -2383,7 +2490,7 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
{
if (info.name() != "label_comp_id")
continue;
if (compoundID.empty())
compoundID = info.value();
else if (info.value() != compoundID)
@@ -2402,11 +2509,13 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci) { return ci.name() == name; }) == ai.end())
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci)
{ return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
auto bi = std::find_if(mBranches.begin(), mBranches.end(), [asym_id](Branch &b) { return b.asymID() == asym_id; });
auto bi = std::find_if(mBranches.begin(), mBranches.end(), [asym_id](Branch &b)
{ return b.asymID() == asym_id; });
if (bi == mBranches.end())
throw std::logic_error("Create a branch first!");
@@ -2420,14 +2529,15 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
{
auto atom_id = atom_site.getUniqueID("");
appendUnlessSet(atom, { "group_PDB", "HETATM"} );
appendUnlessSet(atom, { "id", atom_id} );
appendUnlessSet(atom, { "label_comp_id", compoundID} );
appendUnlessSet(atom, { "label_entity_id", tmp_entity_id} );
appendUnlessSet(atom, { "auth_comp_id", compoundID} );
appendUnlessSet(atom, { "auth_asym_id", asym_id} );
appendUnlessSet(atom, { "pdbx_PDB_model_num", 1} );
appendUnlessSet(atom, { "label_alt_id", ""} );
appendUnlessSet(atom, {"group_PDB", "HETATM"});
appendUnlessSet(atom, {"id", atom_id});
appendUnlessSet(atom, {"label_asym_id", asym_id});
appendUnlessSet(atom, {"label_comp_id", compoundID});
appendUnlessSet(atom, {"label_entity_id", tmp_entity_id});
appendUnlessSet(atom, {"auth_comp_id", compoundID});
appendUnlessSet(atom, {"auth_asym_id", asym_id});
appendUnlessSet(atom, {"pdbx_PDB_model_num", 1});
appendUnlessSet(atom, {"label_alt_id", ""});
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
@@ -2439,6 +2549,11 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
auto entity_id = createEntityForBranch(branch);
// Update the entity id of the asym
auto &struct_asym = db["struct_asym"];
auto r = struct_asym.find1("id"_key == asym_id);
r["entity_id"] = entity_id;
for (auto &sugar : branch)
{
for (auto atom : sugar.atoms())
@@ -2451,24 +2566,24 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
for (auto &sugar : branch)
{
pdbx_branch_scheme.emplace({
{ "asym_id", asym_id },
{ "entity_id", entity_id },
{ "num", sugar.num() },
{ "mon_id", sugar.compoundID() },
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", sugar.num()},
{"mon_id", sugar.compoundID()},
{ "pdb_asym_id", asym_id },
{ "pdb_seq_num", sugar.num() },
{ "pdb_mon_id", sugar.compoundID() },
{"pdb_asym_id", asym_id},
{"pdb_seq_num", sugar.num()},
{"pdb_mon_id", sugar.compoundID()},
// TODO: need fix, collect from nag_atoms?
{ "auth_asym_id", asym_id },
{ "auth_mon_id", sugar.compoundID() },
{ "auth_seq_num", sugar.num() },
{"auth_asym_id", asym_id},
{"auth_mon_id", sugar.compoundID()},
{"auth_seq_num", sugar.authSeqID()},
{ "hetero", "n" }
{"hetero", "n"}
});
}
return branch;
}
@@ -2484,61 +2599,54 @@ std::string Structure::createEntityForBranch(Branch &branch)
{
entityID = entity.find1<std::string>("type"_key == "branched" and "pdbx_description"_key == entityName, "id");
}
catch(const std::exception& e)
catch (const std::exception &e)
{
entityID = entity.getUniqueID("");
if (cif::VERBOSE)
std::cout << "Creating new entity " << entityID << " for branched sugar " << entityName << std::endl;
entity.emplace({
{ "id", entityID },
{ "type", "branched" },
{ "src_method", "man" },
{ "pdbx_description", entityName },
{ "formula_weight", branch.weight() }
});
}
entity.emplace({{"id", entityID},
{"type", "branched"},
{"src_method", "man"},
{"pdbx_description", entityName},
{"formula_weight", branch.weight()}});
auto &pdbx_entity_branch_list = mDb["pdbx_entity_branch_list"];
for (auto &sugar : branch)
{
pdbx_entity_branch_list.emplace({
{"entity_id", entityID},
{"comp_id", sugar.compoundID()},
{"num", sugar.num()},
{"hetero", "n"}
});
}
auto &pdbx_entity_branch_list = mDb["pdbx_entity_branch_list"];
pdbx_entity_branch_list.erase("entity_id"_key == entityID);
auto &pdbx_entity_branch_link = mDb["pdbx_entity_branch_link"];
for (auto &s1 : branch)
{
auto l2 = s1.getLink();
for (auto &sugar : branch)
{
pdbx_entity_branch_list.emplace({
{ "entity_id", entityID },
{ "comp_id", sugar.compoundID() },
{ "num", sugar.num() },
{ "hetero", "n" }
});
}
if (not l2)
continue;
auto &pdbx_entity_branch_link = mDb["pdbx_entity_branch_link"];
pdbx_entity_branch_link.erase("entity_id"_key == entityID);
auto &s2 = branch.at(std::stoi(l2.authSeqID()) - 1);
auto l1 = s2.atomByID("C1");
for (auto &s1 : branch)
{
auto l1 = s1.getLink();
if (not l1)
continue;
auto &s2 = branch.at(std::stoi(l1.authSeqID()) - 1);
auto l2 = s2.atomByID("C1");
pdbx_entity_branch_link.emplace({
{ "link_id", pdbx_entity_branch_link.getUniqueID("") },
{ "entity_id", entityID },
{ "entity_branch_list_num_1", s2.authSeqID() },
{ "comp_id_1", s2.compoundID() },
{ "atom_id_1", l2.labelAtomID() },
{ "leaving_atom_id_1", "O1" },
{ "entity_branch_list_num_2", s1.authSeqID() },
{ "comp_id_2", s1.compoundID() },
{ "atom_id_2", l1.labelAtomID() },
{ "leaving_atom_id_2", "H" + l1.labelAtomID() },
{ "value_order", "sing" }
});
pdbx_entity_branch_link.emplace({
{"link_id", pdbx_entity_branch_link.getUniqueID("")},
{"entity_id", entityID},
{"entity_branch_list_num_1", s1.authSeqID()},
{"comp_id_1", s1.compoundID()},
{"atom_id_1", l1.labelAtomID()},
{"leaving_atom_id_1", "O1"},
{"entity_branch_list_num_2", s2.authSeqID()},
{"comp_id_2", s2.compoundID()},
{"atom_id_2", l2.labelAtomID()},
{"leaving_atom_id_2", "H" + l2.labelAtomID()},
{"value_order", "sing"}
});
}
}
return entityID;
@@ -2665,7 +2773,7 @@ void Structure::validateAtoms() const
assert(i != atoms.end());
atoms.erase(i);
};
for (auto &poly : mPolymers)
{
for (auto &monomer : poly)

View File

@@ -165,4 +165,35 @@ BOOST_AUTO_TEST_CASE(create_sugar_2)
BOOST_CHECK_EQUAL(bN.size(), 2);
file.save(gTestDir / "test-create_sugar_2.cif");
BOOST_CHECK_NO_THROW(mmcif::Structure s2(file));
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(delete_sugar_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
mmcif::File file(example.string());
mmcif::Structure s(file);
// Get branch for H
auto &bG = s.getBranchByAsymID("G");
BOOST_CHECK_EQUAL(bG.size(), 4);
s.removeResidue(bG[1]);
BOOST_CHECK_EQUAL(bG.size(), 1);
auto &bN = s.getBranchByAsymID("G");
BOOST_CHECK_EQUAL(bN.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose");
BOOST_CHECK_EQUAL(bN.size(), 1);
file.save(gTestDir / "test-create_sugar_3.cif");
BOOST_CHECK_NO_THROW(mmcif::Structure s2(file));
}