Compare commits

...

11 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
0ef8eb59f8 Fix scattering factors error 2022-05-18 13:04:42 +02:00
Maarten L. Hekkelman
b5fe4a9a87 locating resources that might be protected 2022-05-18 11:53:13 +02:00
Maarten L. Hekkelman
11fea31b98 more loading resources 2022-05-18 11:37:26 +02:00
Maarten L. Hekkelman
f629275ed5 locating resources that might be protected 2022-05-18 11:25:47 +02:00
Maarten L. Hekkelman
a5f6166469 locating resources that might be protected 2022-05-18 11:14:14 +02:00
Maarten L. Hekkelman
501050e591 Add move constructor to mmcif::Structure 2022-05-10 17:11:04 +02:00
Maarten L. Hekkelman
e1b240b2b2 sugar work 2022-05-04 16:48:28 +02:00
Maarten L. Hekkelman
3d79278ed7 Merge branch 'trunk' into develop 2022-05-04 09:51:15 +02:00
Maarten L. Hekkelman
5e0b197a43 mmcif::Atom::compound() revision 2022-05-04 09:50:24 +02:00
Maarten L. Hekkelman
af721eb196 Make having no compound less fatal 2022-05-02 14:40:22 +02:00
Maarten L. Hekkelman
788e315f5e Fix entity_branch_link entry 2022-05-02 12:24:35 +02:00
9 changed files with 418 additions and 223 deletions

1
.gitignore vendored
View File

@@ -13,3 +13,4 @@ msvc/
Testing/
rsrc/feature-request.txt
test/1cbs.cif
test/test-create_sugar_?.cif

View File

@@ -25,7 +25,7 @@
cmake_minimum_required(VERSION 3.16)
# set the project name
project(cifpp VERSION 4.0.1 LANGUAGES CXX)
project(cifpp VERSION 4.1.1 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

View File

@@ -1,3 +1,11 @@
Version 4.1.1
- Fall back to zero charge for scattering factors if the atom
was not found in the table.
- Improve code to locate resources, failing less.
Version 4.1.0
- Some interface changes for mmcif::Atom
Version 4.0.1
- Added a bunch of const methods to Datablock and Category.
- Changed PDB writing interface to accept Datablock instead of File.

View File

@@ -80,7 +80,7 @@ class Atom
void moveTo(const Point &p);
const Compound &comp() const;
const Compound *compound() const;
const std::string get_property(const std::string_view name) const;
void set_property(const std::string_view name, const std::string &value);
@@ -186,7 +186,7 @@ class Atom
bool isSymmetryCopy() const { return impl().mSymmetryCopy; }
std::string symmetry() const { return impl().mSymmetryOperator; }
const Compound &comp() const { return impl().comp(); }
const Compound &compound() const;
bool isWater() const { return impl().mCompID == "HOH" or impl().mCompID == "H2O" or impl().mCompID == "WAT"; }
int charge() const;
@@ -527,6 +527,9 @@ class Sugar : public Residue
Sugar(const Branch &branch, const std::string &compoundID,
const std::string &asymID, int authSeqID);
Sugar(Sugar &&rhs);
Sugar &operator=(Sugar &&rhs);
int num() const { return std::stoi(mAuthSeqID); }
std::string name() const;
@@ -534,9 +537,14 @@ class Sugar : public Residue
Atom getLink() const { return mLink; }
void setLink(Atom link) { mLink = link; }
size_t getLinkNr() const
{
return mLink ? std::stoi(mLink.authSeqID()) : 0;
}
private:
const Branch &mBranch;
const Branch *mBranch;
Atom mLink;
};
@@ -623,10 +631,14 @@ class Structure
Structure(cif::Datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
Structure(Structure &&s) = default;
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
Structure(const Structure &);
Structure &operator=(const Structure &) = delete;
Structure &operator=(Structure &&s) = default;
~Structure();
const AtomView &atoms() const { return mAtoms; }
@@ -710,7 +722,11 @@ class Structure
}
// Actions
void removeAtom(Atom &a);
void removeAtom(Atom &a)
{
removeAtom(a, true);
}
void swapAtoms(Atom a1, Atom a2); // swap the labels for these atoms
void moveAtom(Atom a, Point p); // move atom to a new location
void changeResidue(Residue &res, const std::string &newCompound,
@@ -815,6 +831,9 @@ class Structure
Atom &emplace_atom(Atom &&atom);
void removeAtom(Atom &a, bool removeFromResidue);
void removeSugar(Sugar &sugar);
cif::Datablock &mDb;
size_t mModelNr;
AtomView mAtoms;

View File

@@ -1083,6 +1083,20 @@ auto AtomTypeTraits::wksf(int charge) const -> const SFData&
return sf.sf;
}
if (charge != 0)
{
// Oops, not found. Fall back to zero charge and see if we can use that
if (cif::VERBOSE > 0)
std::cerr << "No scattering factor found for " << name() << " with charge " << charge << " will try to fall back to zero charge..." << std::endl;
for (auto& sf: data::kWKSFData)
{
if (sf.symbol == mInfo->type and sf.charge == 0)
return sf.sf;
}
}
throw std::runtime_error("No scattering factor found for " + name() + std::to_string(charge));
}

View File

@@ -1204,15 +1204,18 @@ std::filesystem::path gDataDir;
void addDataDirectory(std::filesystem::path dataDir)
{
if (VERBOSE > 0 and not fs::exists(dataDir))
std::cerr << "The specified data directory " << dataDir << " does not exist" << std::endl;
gDataDir = dataDir;
std::error_code ec;
if (fs::exists(dataDir, ec))
gDataDir = dataDir;
else if (VERBOSE > 0)
std::cerr << "The specified data directory " << dataDir << " cannot be used: " << ec.message() << std::endl;
}
void addFileResource(const std::string &name, std::filesystem::path dataFile)
{
if (not fs::exists(dataFile))
throw std::runtime_error("Attempt to add a file resource for " + name + " that does not exist: " + dataFile.string());
std::error_code ec;
if (not fs::exists(dataFile, ec) or ec)
throw std::runtime_error("Attempt to add a file resource for " + name + " that cannot be used (" + dataFile.string() + ") :" + ec.message());
gLocalResources[name] = dataFile;
}
@@ -1220,42 +1223,51 @@ void addFileResource(const std::string &name, std::filesystem::path dataFile)
std::unique_ptr<std::istream> loadResource(std::filesystem::path name)
{
std::unique_ptr<std::istream> result;
std::error_code ec;
fs::path p = name;
if (gLocalResources.count(name.string()))
{
std::unique_ptr<std::ifstream> file(new std::ifstream(gLocalResources[name.string()], std::ios::binary));
if (file->is_open())
result.reset(file.release());
try
{
std::unique_ptr<std::ifstream> file(new std::ifstream(gLocalResources[name.string()], std::ios::binary));
if (file->is_open())
result.reset(file.release());
}
catch (...) {}
}
if (not result and not fs::exists(p) and not gDataDir.empty())
p = gDataDir / name;
if (not result and (not fs::exists(p, ec) or ec) and not gDataDir.empty())
{
auto p2 = gDataDir / p;
if (fs::exists(p2, ec) and not ec)
swap(p, p2);
}
#if defined(CACHE_DIR)
if (not result and not fs::exists(p))
if (not result and (not fs::exists(p, ec) or ec))
{
auto p2 = fs::path(CACHE_DIR) / p;
if (fs::exists(p2))
if (fs::exists(p2, ec) and not ec)
swap(p, p2);
}
#endif
#if defined(DATA_DIR)
if (not result and not fs::exists(p))
if (not result and (not fs::exists(p, ec) or ec))
{
auto p2 = fs::path(DATA_DIR) / p;
if (fs::exists(p2))
if (fs::exists(p2, ec) and not ec)
swap(p, p2);
}
#endif
#if defined(CCP4) and CCP4
if (not result and not fs::exists(p))
if (not result and (not fs::exists(p, ec) or ec))
{
const char* CCP4_DIR = getenv("CCP4");
if (CCP4_DIR != nullptr and fs::exists(CCP4_DIR))
if (CCP4_DIR != nullptr and fs::exists(CCP4_DIR, ec) and not ec)
{
auto p2 = fs::path(DATA_DIR) / p;
if (fs::exists(p2))
@@ -1264,11 +1276,15 @@ std::unique_ptr<std::istream> loadResource(std::filesystem::path name)
}
#endif
if (not result and fs::exists(p))
if (not result and fs::exists(p, ec) and not ec)
{
std::unique_ptr<std::ifstream> file(new std::ifstream(p, std::ios::binary));
if (file->is_open())
result.reset(file.release());
try
{
std::unique_ptr<std::ifstream> file(new std::ifstream(p, std::ios::binary));
if (file->is_open())
result.reset(file.release());
}
catch (...) {}
}
if (not result and gResourceData)

View File

@@ -389,6 +389,8 @@ const Validator &ValidatorFactory::operator[](std::string_view dictionary)
mValidators.emplace_back(dictionary, *data);
else
{
std::error_code ec;
// might be a compressed dictionary on disk
fs::path p = dictionary;
if (p.extension() == ".dic")
@@ -397,12 +399,12 @@ const Validator &ValidatorFactory::operator[](std::string_view dictionary)
p = p.parent_path() / (p.filename().string() + ".dic.gz");
#if defined(CACHE_DIR) and defined(DATA_DIR)
if (not fs::exists(p))
if (not fs::exists(p, ec) or ec)
{
for (const char *dir : {CACHE_DIR, DATA_DIR})
{
auto p2 = fs::path(dir) / p;
if (fs::exists(p2))
if (fs::exists(p2, ec) and not ec)
{
swap(p, p2);
break;
@@ -411,7 +413,7 @@ const Validator &ValidatorFactory::operator[](std::string_view dictionary)
}
#endif
if (fs::exists(p))
if (fs::exists(p, ec) and not ec)
{
std::ifstream file(p, std::ios::binary);
if (not file.is_open())

View File

@@ -148,20 +148,10 @@ int Atom::AtomImpl::charge() const
if (not formalCharge.has_value())
{
auto &compound = comp();
auto c = compound();
if (compound.atoms().size() == 1)
formalCharge = compound.atoms().front().charge;
// {
// for (auto cAtom : compound.atoms())
// {
// if (cAtom.id != mAtomID)
// continue;
// formalCharge = cAtom.charge;
// break;
// }
// }
if (c != nullptr and c->atoms().size() == 1)
formalCharge = c->atoms().front().charge;
}
return formalCharge.value_or(0);
@@ -189,23 +179,16 @@ void Atom::AtomImpl::moveTo(const Point &p)
mLocation = p;
}
const Compound &Atom::AtomImpl::comp() const
const Compound *Atom::AtomImpl::compound() const
{
if (mCompound == nullptr)
{
std::string compID;
cif::tie(compID) = mRow.get("label_comp_id");
std::string compID = get_property("label_comp_id");
mCompound = CompoundFactory::instance().create(compID);
if (cif::VERBOSE > 0 and mCompound == nullptr)
std::cerr << "Compound not found: '" << compID << '\'' << std::endl;
}
if (mCompound == nullptr)
throw std::runtime_error("no compound");
return *mCompound;
return mCompound;
}
const std::string Atom::AtomImpl::get_property(const std::string_view name) const
@@ -216,7 +199,7 @@ const std::string Atom::AtomImpl::get_property(const std::string_view name) cons
return ref.as<std::string>();
}
mCachedRefs.emplace_back(name, const_cast<cif::Row&>(mRow)[name]);
mCachedRefs.emplace_back(name, const_cast<cif::Row &>(mRow)[name]);
return std::get<1>(mCachedRefs.back()).as<std::string>();
}
@@ -282,6 +265,21 @@ std::string Atom::pdbID() const
get_property<std::string>("pdbx_PDB_ins_code");
}
const Compound &Atom::compound() const
{
auto result = impl().compound();
if (result == nullptr)
{
if (cif::VERBOSE > 0)
std::cerr << "Compound not found: '" << get_property<std::string>("label_comp_id") << '\'' << std::endl;
throw std::runtime_error("no compound");
}
return *result;
}
int Atom::charge() const
{
return impl().charge();
@@ -357,8 +355,8 @@ bool Atom::operator==(const Atom &rhs) const
{
if (mImpl == rhs.mImpl)
return true;
if (not (mImpl and rhs.mImpl))
if (not(mImpl and rhs.mImpl))
return false;
return &mImpl->mDb == &rhs.mImpl->mDb and mImpl->mID == rhs.mImpl->mID;
@@ -1130,10 +1128,28 @@ int Polymer::Distance(const Monomer &a, const Monomer &b) const
Sugar::Sugar(const Branch &branch, const std::string &compoundID,
const std::string &asymID, int authSeqID)
: Residue(branch.structure(), compoundID, asymID, 0, std::to_string(authSeqID))
, mBranch(branch)
, mBranch(&branch)
{
}
Sugar::Sugar(Sugar &&rhs)
: Residue(std::forward<Residue>(rhs))
, mBranch(rhs.mBranch)
{
}
Sugar &Sugar::operator=(Sugar &&rhs)
{
if (this != &rhs)
{
Residue::operator=(std::forward<Residue>(rhs));
mBranch = rhs.mBranch;
}
return *this;
}
// bool Sugar::hasLinkedSugarAtLeavingO(int leavingO) const
// {
// return false;
@@ -1190,16 +1206,14 @@ Branch::Branch(Structure &structure, const std::string &asymID)
for (const auto &[entity_id] : struct_asym.find<std::string>("id"_key == asymID, "entity_id"))
{
for (const auto&[comp_id, num] : branch_list.find<std::string,int>(
"entity_id"_key == entity_id, "comp_id", "num"
))
for (const auto &[comp_id, num] : branch_list.find<std::string, int>(
"entity_id"_key == entity_id, "comp_id", "num"))
{
emplace_back(*this, comp_id, asymID, num);
}
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"
))
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
{
if (not cif::iequals(atom1, "c1"))
throw std::runtime_error("invalid pdbx_entity_branch_link");
@@ -1224,8 +1238,7 @@ void Branch::linkAtoms()
auto entity_id = front().entityID();
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"
))
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
{
if (not cif::iequals(atom1, "c1"))
throw std::runtime_error("invalid pdbx_entity_branch_link");
@@ -1242,7 +1255,6 @@ std::string Branch::name() const
return empty() ? "" : name(front());
}
std::string Branch::name(const Sugar &s) const
{
using namespace cif::literals;
@@ -1261,19 +1273,18 @@ std::string Branch::name(const Sugar &s) const
if (not result.empty() and result.back() != ']')
result += '-';
return result + s.name();
}
float Branch::weight() const
{
return std::accumulate(begin(), end(), 0.f, [](float sum, const Sugar &s)
{
{
auto compound = mmcif::CompoundFactory::instance().create(s.compoundID());
if (compound)
sum += compound->formulaWeight();
return sum;
});
return sum; });
}
// --------------------------------------------------------------------
@@ -1528,7 +1539,7 @@ void Structure::loadData()
{
if (res.asymID() != atom.labelAsymID())
continue;
res.addAtom(atom);
break;
}
@@ -1614,7 +1625,7 @@ AtomView Structure::waters() const
Atom Structure::getAtomByID(const std::string &id) const
{
assert(mAtoms.size() == mAtomIndex.size());
int L = 0, R = mAtoms.size() - 1;
while (L <= R)
{
@@ -1711,7 +1722,7 @@ Polymer &Structure::getPolymerByAsymID(const std::string &asymID)
{
if (poly.asymID() != asymID)
continue;
return poly;
}
@@ -1794,7 +1805,7 @@ Residue &Structure::getResidue(const std::string &asymID, const std::string &com
throw std::out_of_range("Could not find residue " + asymID + '/' + std::to_string(seqID) + '-' + authSeqID);
}
Branch& Structure::getBranchByAsymID(const std::string &asymID)
Branch &Structure::getBranchByAsymID(const std::string &asymID)
{
for (auto &branch : mBranches)
{
@@ -1856,7 +1867,7 @@ std::string Structure::insertCompound(const std::string &compoundID, bool isEnti
// --------------------------------------------------------------------
Atom& Structure::emplace_atom(Atom &&atom)
Atom &Structure::emplace_atom(Atom &&atom)
{
int L = 0, R = mAtomIndex.size() - 1;
while (L <= R)
@@ -1881,7 +1892,7 @@ Atom& Structure::emplace_atom(Atom &&atom)
return mAtoms.emplace_back(std::move(atom));
}
void Structure::removeAtom(Atom &a)
void Structure::removeAtom(Atom &a, bool removeFromResidue)
{
using namespace cif::literals;
@@ -1890,15 +1901,18 @@ void Structure::removeAtom(Atom &a)
auto &atomSites = db["atom_site"];
atomSites.erase("id"_key == a.id());
try
if (removeFromResidue)
{
auto &res = getResidue(a);
res.mAtoms.erase(std::remove(res.mAtoms.begin(), res.mAtoms.end(), a), res.mAtoms.end());
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << "Error removing atom from residue: " << ex.what() << std::endl;
try
{
auto &res = getResidue(a);
res.mAtoms.erase(std::remove(res.mAtoms.begin(), res.mAtoms.end(), a), res.mAtoms.end());
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << "Error removing atom from residue: " << ex.what() << std::endl;
}
}
assert(mAtomIndex.size() == mAtoms.size());
@@ -1940,7 +1954,7 @@ void Structure::removeAtom(Atom &a)
R = i - 1;
}
#ifndef NDEBUG
assert(removed);
assert(removed);
#endif
}
@@ -1964,7 +1978,7 @@ void Structure::swapAtoms(Atom a1, Atom a2)
auto l4 = r2["auth_atom_id"];
l3.swap(l4);
}
catch (const std::exception& ex)
catch (const std::exception &ex)
{
std::throw_with_nested(std::runtime_error("Failed to swap atoms"));
}
@@ -2085,19 +2099,16 @@ void Structure::removeResidue(Residue &res)
cif::Datablock &db = datablock();
auto atoms = res.atoms();
for (auto atom : atoms)
removeAtom(atom);
switch (res.entityType())
{
case EntityType::Polymer:
{
Monomer &monomer = dynamic_cast<Monomer&>(res);
Monomer &monomer = dynamic_cast<Monomer &>(res);
db["pdbx_poly_seq_scheme"].erase(
"asym_id"_key == res.asymID() and
"seq_id"_key == res.seqID()
);
"seq_id"_key == res.seqID());
for (auto &poly : mPolymers)
poly.erase(std::remove(poly.begin(), poly.end(), monomer), poly.end());
@@ -2114,14 +2125,106 @@ void Structure::removeResidue(Residue &res)
db["pdbx_nonpoly_scheme"].erase("asym_id"_key == res.asymID());
mNonPolymers.erase(std::remove(mNonPolymers.begin(), mNonPolymers.end(), res), mNonPolymers.end());
break;
case EntityType::Branched:
throw std::runtime_error("Don't remove a sugar using removeResidue...");
{
Sugar &sugar = dynamic_cast<Sugar&>(res);
removeSugar(sugar);
atoms.clear();
break;
}
case EntityType::Macrolide:
// TODO: Fix this?
throw std::runtime_error("no support for macrolides yet");
}
for (auto atom : atoms)
removeAtom(atom, false);
}
void Structure::removeSugar(Sugar &sugar)
{
using namespace cif::literals;
std::string asym_id = sugar.asymID();
Branch &branch = getBranchByAsymID(asym_id);
auto si = std::find(branch.begin(), branch.end(), sugar);
if (si == branch.end())
throw std::runtime_error("Sugar not part of branch");
size_t six = si - branch.begin();
if (six == 0) // first sugar, means the death of this branch
removeBranch(branch);
else
{
std::set<size_t> dix;
std::stack<size_t> test;
test.push(sugar.num());
while (not test.empty())
{
auto tix = test.top();
test.pop();
if (dix.count(tix))
continue;
dix.insert(tix);
for (auto atom : branch[tix - 1].atoms())
removeAtom(atom, false);
for (auto &s : branch)
{
if (s.getLinkNr() == tix)
test.push(s.num());
}
}
branch.erase(remove_if(branch.begin(), branch.end(), [dix](const Sugar &s) { return dix.count(s.num()); }), branch.end());
cif::Datablock &db = datablock();
auto entity_id = createEntityForBranch(branch);
// Update the entity id of the asym
auto &struct_asym = db["struct_asym"];
auto r = struct_asym.find1("id"_key == asym_id);
r["entity_id"] = entity_id;
for (auto &sugar : branch)
{
for (auto atom : sugar.atoms())
atom.set_property("label_entity_id", entity_id);
}
auto &pdbx_branch_scheme = db["pdbx_branch_scheme"];
pdbx_branch_scheme.erase("asym_id"_key == asym_id);
for (auto &sugar : branch)
{
pdbx_branch_scheme.emplace({
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", sugar.num()},
{"mon_id", sugar.compoundID()},
{"pdb_asym_id", asym_id},
{"pdb_seq_num", sugar.num()},
{"pdb_mon_id", sugar.compoundID()},
// TODO: need fix, collect from nag_atoms?
{"auth_asym_id", asym_id},
{"auth_mon_id", sugar.compoundID()},
{"auth_seq_num", sugar.num()},
{"hetero", "n"}
});
}
}
}
void Structure::removeBranch(Branch &branch)
@@ -2172,8 +2275,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve
{
auto atom_id = atom_site.getUniqueID("");
auto &&[row, inserted] = atom_site.emplace({
{"group_PDB", atom.get_property<std::string>("group_PDB")},
auto &&[row, inserted] = atom_site.emplace({{"group_PDB", atom.get_property<std::string>("group_PDB")},
{"id", atom_id},
{"type_symbol", atom.get_property<std::string>("type_symbol")},
{"label_atom_id", atom.get_property<std::string>("label_atom_id")},
@@ -2193,8 +2295,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve
{"auth_comp_id", comp_id},
{"auth_asym_id", asym_id},
{"auth_atom_id", atom.get_property<std::string>("label_atom_id")},
{"pdbx_PDB_model_num", 1}
});
{"pdbx_PDB_model_num", 1}});
auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
res.addAtom(newAtom);
@@ -2203,16 +2304,16 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve
auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"];
int ndb_nr = pdbx_nonpoly_scheme.find("asym_id"_key == asym_id and "entity_id"_key == entity_id).size() + 1;
pdbx_nonpoly_scheme.emplace({
{ "asym_id", asym_id },
{ "entity_id", entity_id },
{ "mon_id", comp_id },
{ "ndb_seq_num", ndb_nr },
{ "pdb_seq_num", res.authSeqID() },
{ "auth_seq_num", res.authSeqID() },
{ "pdb_mon_id", comp_id },
{ "auth_mon_id", comp_id },
{ "pdb_strand_id", asym_id },
{ "pdb_ins_code", "." },
{"asym_id", asym_id},
{"entity_id", entity_id},
{"mon_id", comp_id},
{"ndb_seq_num", ndb_nr},
{"pdb_seq_num", res.authSeqID()},
{"auth_seq_num", res.authSeqID()},
{"pdb_mon_id", comp_id},
{"auth_mon_id", comp_id},
{"pdb_strand_id", asym_id},
{"pdb_ins_code", "."},
});
return asym_id;
@@ -2226,8 +2327,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
auto &struct_asym = db["struct_asym"];
std::string asym_id = struct_asym.getUniqueID();
struct_asym.emplace({
{"id", asym_id},
struct_asym.emplace({{"id", asym_id},
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
@@ -2241,7 +2341,8 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci) { return ci.name() == name; }) == ai.end())
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci)
{ return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
@@ -2249,17 +2350,17 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
{
auto atom_id = atom_site.getUniqueID("");
appendUnlessSet(atom, { "group_PDB", "HETATM"} );
appendUnlessSet(atom, { "id", atom_id} );
appendUnlessSet(atom, { "label_comp_id", comp_id} );
appendUnlessSet(atom, { "label_asym_id", asym_id} );
appendUnlessSet(atom, { "label_seq_id", ""} );
appendUnlessSet(atom, { "label_entity_id", entity_id} );
appendUnlessSet(atom, { "auth_comp_id", comp_id} );
appendUnlessSet(atom, { "auth_asym_id", asym_id} );
appendUnlessSet(atom, { "auth_seq_id", 1} );
appendUnlessSet(atom, { "pdbx_PDB_model_num", 1} );
appendUnlessSet(atom, { "label_alt_id", ""} );
appendUnlessSet(atom, {"group_PDB", "HETATM"});
appendUnlessSet(atom, {"id", atom_id});
appendUnlessSet(atom, {"label_comp_id", comp_id});
appendUnlessSet(atom, {"label_asym_id", asym_id});
appendUnlessSet(atom, {"label_seq_id", ""});
appendUnlessSet(atom, {"label_entity_id", entity_id});
appendUnlessSet(atom, {"auth_comp_id", comp_id});
appendUnlessSet(atom, {"auth_asym_id", asym_id});
appendUnlessSet(atom, {"auth_seq_id", 1});
appendUnlessSet(atom, {"pdbx_PDB_model_num", 1});
appendUnlessSet(atom, {"label_alt_id", ""});
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
@@ -2270,22 +2371,22 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"];
int ndb_nr = pdbx_nonpoly_scheme.find("asym_id"_key == asym_id and "entity_id"_key == entity_id).size() + 1;
pdbx_nonpoly_scheme.emplace({
{ "asym_id", asym_id },
{ "entity_id", entity_id },
{ "mon_id", comp_id },
{ "ndb_seq_num", ndb_nr },
{ "pdb_seq_num", res.authSeqID() },
{ "auth_seq_num", res.authSeqID() },
{ "pdb_mon_id", comp_id },
{ "auth_mon_id", comp_id },
{ "pdb_strand_id", asym_id },
{ "pdb_ins_code", "." },
{"asym_id", asym_id},
{"entity_id", entity_id},
{"mon_id", comp_id},
{"ndb_seq_num", ndb_nr},
{"pdb_seq_num", res.authSeqID()},
{"auth_seq_num", res.authSeqID()},
{"pdb_mon_id", comp_id},
{"auth_mon_id", comp_id},
{"pdb_strand_id", asym_id},
{"pdb_ins_code", "."},
});
return asym_id;
}
Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
Branch &Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
{
// sanity check
for (auto &nag_atom : nag_atoms)
@@ -2311,7 +2412,8 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci) { return ci.name() == name; }) == ai.end())
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci)
{ return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
@@ -2319,17 +2421,17 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
{
auto atom_id = atom_site.getUniqueID("");
appendUnlessSet(atom, { "group_PDB", "HETATM"} );
appendUnlessSet(atom, { "id", atom_id} );
appendUnlessSet(atom, { "label_comp_id", "NAG"} );
appendUnlessSet(atom, { "label_asym_id", asym_id} );
appendUnlessSet(atom, { "label_seq_id", "."} );
appendUnlessSet(atom, { "label_entity_id", tmp_entity_id} );
appendUnlessSet(atom, { "auth_comp_id", "NAG"} );
appendUnlessSet(atom, { "auth_asym_id", asym_id} );
appendUnlessSet(atom, { "auth_seq_id", 1} );
appendUnlessSet(atom, { "pdbx_PDB_model_num", 1} );
appendUnlessSet(atom, { "label_alt_id", ""} );
appendUnlessSet(atom, {"group_PDB", "HETATM"});
appendUnlessSet(atom, {"id", atom_id});
appendUnlessSet(atom, {"label_comp_id", "NAG"});
appendUnlessSet(atom, {"label_asym_id", asym_id});
appendUnlessSet(atom, {"label_seq_id", "."});
appendUnlessSet(atom, {"label_entity_id", tmp_entity_id});
appendUnlessSet(atom, {"auth_comp_id", "NAG"});
appendUnlessSet(atom, {"auth_asym_id", asym_id});
appendUnlessSet(atom, {"auth_seq_id", 1});
appendUnlessSet(atom, {"pdbx_PDB_model_num", 1});
appendUnlessSet(atom, {"label_alt_id", ""});
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
@@ -2345,33 +2447,34 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}});
{"details", "?"}
});
for (auto &a : sugar.atoms())
a.set_property("label_entity_id", entity_id);
db["pdbx_branch_scheme"].emplace({
{ "asym_id", asym_id },
{ "entity_id", entity_id },
{ "num", 1 },
{ "mon_id", "NAG" },
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", 1},
{"mon_id", "NAG"},
{ "pdb_asym_id", asym_id },
{ "pdb_seq_num", 1 },
{ "pdb_mon_id", "NAG" },
{"pdb_asym_id", asym_id},
{"pdb_seq_num", 1},
{"pdb_mon_id", "NAG"},
// TODO: need fix, collect from nag_atoms?
{ "auth_asym_id", asym_id },
{ "auth_mon_id", "NAG" },
{ "auth_seq_num", 1 },
{"auth_asym_id", asym_id},
{"auth_mon_id", "NAG"},
{"auth_seq_num", 1},
{ "hetero", "n" }
{"hetero", "n"}
});
return branch;
}
Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atom_info,
Branch &Structure::extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atom_info,
int link_sugar, const std::string &link_atom)
{
// sanity check
@@ -2383,7 +2486,7 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
{
if (info.name() != "label_comp_id")
continue;
if (compoundID.empty())
compoundID = info.value();
else if (info.value() != compoundID)
@@ -2402,11 +2505,13 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci) { return ci.name() == name; }) == ai.end())
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci)
{ return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
auto bi = std::find_if(mBranches.begin(), mBranches.end(), [asym_id](Branch &b) { return b.asymID() == asym_id; });
auto bi = std::find_if(mBranches.begin(), mBranches.end(), [asym_id](Branch &b)
{ return b.asymID() == asym_id; });
if (bi == mBranches.end())
throw std::logic_error("Create a branch first!");
@@ -2420,14 +2525,15 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
{
auto atom_id = atom_site.getUniqueID("");
appendUnlessSet(atom, { "group_PDB", "HETATM"} );
appendUnlessSet(atom, { "id", atom_id} );
appendUnlessSet(atom, { "label_comp_id", compoundID} );
appendUnlessSet(atom, { "label_entity_id", tmp_entity_id} );
appendUnlessSet(atom, { "auth_comp_id", compoundID} );
appendUnlessSet(atom, { "auth_asym_id", asym_id} );
appendUnlessSet(atom, { "pdbx_PDB_model_num", 1} );
appendUnlessSet(atom, { "label_alt_id", ""} );
appendUnlessSet(atom, {"group_PDB", "HETATM"});
appendUnlessSet(atom, {"id", atom_id});
appendUnlessSet(atom, {"label_asym_id", asym_id});
appendUnlessSet(atom, {"label_comp_id", compoundID});
appendUnlessSet(atom, {"label_entity_id", tmp_entity_id});
appendUnlessSet(atom, {"auth_comp_id", compoundID});
appendUnlessSet(atom, {"auth_asym_id", asym_id});
appendUnlessSet(atom, {"pdbx_PDB_model_num", 1});
appendUnlessSet(atom, {"label_alt_id", ""});
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
@@ -2439,6 +2545,11 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
auto entity_id = createEntityForBranch(branch);
// Update the entity id of the asym
auto &struct_asym = db["struct_asym"];
auto r = struct_asym.find1("id"_key == asym_id);
r["entity_id"] = entity_id;
for (auto &sugar : branch)
{
for (auto atom : sugar.atoms())
@@ -2451,24 +2562,24 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
for (auto &sugar : branch)
{
pdbx_branch_scheme.emplace({
{ "asym_id", asym_id },
{ "entity_id", entity_id },
{ "num", sugar.num() },
{ "mon_id", sugar.compoundID() },
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", sugar.num()},
{"mon_id", sugar.compoundID()},
{ "pdb_asym_id", asym_id },
{ "pdb_seq_num", sugar.num() },
{ "pdb_mon_id", sugar.compoundID() },
{"pdb_asym_id", asym_id},
{"pdb_seq_num", sugar.num()},
{"pdb_mon_id", sugar.compoundID()},
// TODO: need fix, collect from nag_atoms?
{ "auth_asym_id", asym_id },
{ "auth_mon_id", sugar.compoundID() },
{ "auth_seq_num", sugar.num() },
{"auth_asym_id", asym_id},
{"auth_mon_id", sugar.compoundID()},
{"auth_seq_num", sugar.num()},
{ "hetero", "n" }
{"hetero", "n"}
});
}
return branch;
}
@@ -2484,61 +2595,54 @@ std::string Structure::createEntityForBranch(Branch &branch)
{
entityID = entity.find1<std::string>("type"_key == "branched" and "pdbx_description"_key == entityName, "id");
}
catch(const std::exception& e)
catch (const std::exception &e)
{
entityID = entity.getUniqueID("");
if (cif::VERBOSE)
std::cout << "Creating new entity " << entityID << " for branched sugar " << entityName << std::endl;
entity.emplace({
{ "id", entityID },
{ "type", "branched" },
{ "src_method", "man" },
{ "pdbx_description", entityName },
{ "formula_weight", branch.weight() }
});
}
entity.emplace({{"id", entityID},
{"type", "branched"},
{"src_method", "man"},
{"pdbx_description", entityName},
{"formula_weight", branch.weight()}});
auto &pdbx_entity_branch_list = mDb["pdbx_entity_branch_list"];
for (auto &sugar : branch)
{
pdbx_entity_branch_list.emplace({
{"entity_id", entityID},
{"comp_id", sugar.compoundID()},
{"num", sugar.num()},
{"hetero", "n"}
});
}
auto &pdbx_entity_branch_list = mDb["pdbx_entity_branch_list"];
pdbx_entity_branch_list.erase("entity_id"_key == entityID);
auto &pdbx_entity_branch_link = mDb["pdbx_entity_branch_link"];
for (auto &s1 : branch)
{
auto l2 = s1.getLink();
for (auto &sugar : branch)
{
pdbx_entity_branch_list.emplace({
{ "entity_id", entityID },
{ "comp_id", sugar.compoundID() },
{ "num", sugar.num() },
{ "hetero", "n" }
});
}
if (not l2)
continue;
auto &pdbx_entity_branch_link = mDb["pdbx_entity_branch_link"];
pdbx_entity_branch_link.erase("entity_id"_key == entityID);
auto &s2 = branch.at(std::stoi(l2.authSeqID()) - 1);
auto l1 = s2.atomByID("C1");
for (auto &s1 : branch)
{
auto l1 = s1.getLink();
if (not l1)
continue;
auto &s2 = branch.at(std::stoi(l1.authSeqID()) - 1);
auto l2 = s2.atomByID("C1");
pdbx_entity_branch_link.emplace({
{ "link_id", pdbx_entity_branch_link.getUniqueID("") },
{ "entity_id", entityID },
{ "entity_branch_list_num_1", s2.authSeqID() },
{ "comp_id_1", s2.compoundID() },
{ "atom_id_1", l2.labelAtomID() },
{ "leaving_atom_id_1", "O1" },
{ "entity_branch_list_num_2", s1.authSeqID() },
{ "comp_id_2", s1.compoundID() },
{ "atom_id_2", l1.labelAtomID() },
{ "leaving_atom_id_2", "H" + l1.labelAtomID() },
{ "value_order", "sing" }
});
pdbx_entity_branch_link.emplace({
{"link_id", pdbx_entity_branch_link.getUniqueID("")},
{"entity_id", entityID},
{"entity_branch_list_num_1", s1.authSeqID()},
{"comp_id_1", s1.compoundID()},
{"atom_id_1", l1.labelAtomID()},
{"leaving_atom_id_1", "O1"},
{"entity_branch_list_num_2", s2.authSeqID()},
{"comp_id_2", s2.compoundID()},
{"atom_id_2", l2.labelAtomID()},
{"leaving_atom_id_2", "H" + l2.labelAtomID()},
{"value_order", "sing"}
});
}
}
return entityID;
@@ -2665,7 +2769,7 @@ void Structure::validateAtoms() const
assert(i != atoms.end());
atoms.erase(i);
};
for (auto &poly : mPolymers)
{
for (auto &monomer : poly)

View File

@@ -165,4 +165,35 @@ BOOST_AUTO_TEST_CASE(create_sugar_2)
BOOST_CHECK_EQUAL(bN.size(), 2);
file.save(gTestDir / "test-create_sugar_2.cif");
BOOST_CHECK_NO_THROW(mmcif::Structure s2(file));
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(delete_sugar_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
mmcif::File file(example.string());
mmcif::Structure s(file);
// Get branch for H
auto &bG = s.getBranchByAsymID("G");
BOOST_CHECK_EQUAL(bG.size(), 4);
s.removeResidue(bG[1]);
BOOST_CHECK_EQUAL(bG.size(), 1);
auto &bN = s.getBranchByAsymID("G");
BOOST_CHECK_EQUAL(bN.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose");
BOOST_CHECK_EQUAL(bN.size(), 1);
file.save(gTestDir / "test-create_sugar_3.cif");
BOOST_CHECK_NO_THROW(mmcif::Structure s2(file));
}