mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
initial pass at ZBO support.
does not yet handle the HYD and ZCH lines
This commit is contained in:
@@ -154,6 +154,7 @@ double Bond::getBondTypeAsDouble() const {
|
||||
case AROMATIC: return 1.5; break;
|
||||
case DATIVEONE: return 1.0; break; // FIX: this should probably be different
|
||||
case DATIVE: return 1.0; break; //FIX: again probably wrong
|
||||
case ZERO: return 0; break;
|
||||
default:
|
||||
UNDER_CONSTRUCTION("Bad bond type");
|
||||
}
|
||||
@@ -187,6 +188,7 @@ double Bond::getValenceContrib(const Atom *atom) const {
|
||||
if(atom->getIdx()==getEndAtomIdx())return 1.0;
|
||||
else return 0.0;
|
||||
break;
|
||||
case ZERO: return 0; break;
|
||||
default:
|
||||
UNDER_CONSTRUCTION("Bad bond type");
|
||||
|
||||
|
||||
@@ -71,7 +71,8 @@ namespace RDKit{
|
||||
DATIVE, //!< standard two-electron dative
|
||||
DATIVEL, //!< standard two-electron dative
|
||||
DATIVER, //!< standard two-electron dative
|
||||
OTHER
|
||||
OTHER,
|
||||
ZERO //!< Zero-order bond (from http://pubs.acs.org/doi/abs/10.1021/ci200488k)
|
||||
} BondType;
|
||||
|
||||
//! the bond's direction (for chirality)
|
||||
|
||||
@@ -479,19 +479,19 @@ namespace Canon {
|
||||
// Here's how the black magic works:
|
||||
// - non-ring atom neighbors have their original ranks
|
||||
// - ring atom neighbors have this added to their ranks:
|
||||
// (Bond::OTHER - bondOrder)*MAX_NATOMS*MAX_NATOMS
|
||||
// (MAX_BONDTYPE - bondOrder)*MAX_NATOMS*MAX_NATOMS
|
||||
// - ring-closure neighbors lose a factor of:
|
||||
// (Bond::OTHER+1)*MAX_NATOMS*MAX_NATOMS
|
||||
// (MAX_BONDTYPE+1)*MAX_NATOMS*MAX_NATOMS
|
||||
//
|
||||
// This tactic biases us to traverse to non-ring neighbors first,
|
||||
// original ordering if bond orders are all equal... crafty, neh?
|
||||
//
|
||||
// ---------------------
|
||||
if( colors[otherIdx] == GREY_NODE ) {
|
||||
rank -= static_cast<int>(Bond::OTHER+1) *
|
||||
rank -= static_cast<int>(MAX_BONDTYPE+1) *
|
||||
MAX_NATOMS*MAX_NATOMS;
|
||||
if(!bondSymbols){
|
||||
rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
|
||||
rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
|
||||
MAX_NATOMS;
|
||||
} else {
|
||||
const std::string &symb=(*bondSymbols)[theBond->getIdx()];
|
||||
@@ -500,7 +500,7 @@ namespace Canon {
|
||||
}
|
||||
} else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){
|
||||
if(!bondSymbols){
|
||||
rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
|
||||
rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
|
||||
MAX_NATOMS*MAX_NATOMS;
|
||||
} else {
|
||||
const std::string &symb=(*bondSymbols)[theBond->getIdx()];
|
||||
|
||||
@@ -22,6 +22,7 @@ namespace RDKit {
|
||||
namespace Canon {
|
||||
const int MAX_NATOMS=5000; //!< used in the canonical traversal code
|
||||
const int MAX_CYCLES=99; //!< used in the canonical traversal code
|
||||
const int MAX_BONDTYPE=32; //!< used in the canonical traversal code
|
||||
|
||||
//! used in traversals of the molecule
|
||||
typedef enum {
|
||||
|
||||
@@ -571,6 +571,57 @@ namespace RDKit{
|
||||
}
|
||||
}
|
||||
|
||||
void ParseZBOLine(RWMol *mol, const std::string &text,unsigned int line){
|
||||
PRECONDITION(mol,"bad mol");
|
||||
PRECONDITION(text.substr(0,6)==std::string("M ZBO"),"bad ZBO line");
|
||||
|
||||
unsigned int nent;
|
||||
try {
|
||||
nent = FileParserUtils::stripSpacesAndCast<unsigned int>(text.substr(6,3));
|
||||
}
|
||||
catch (boost::bad_lexical_cast &) {
|
||||
std::ostringstream errout;
|
||||
errout << "Cannot convert " << text.substr(6,3) << " to int on line "<<line;
|
||||
throw FileParseException(errout.str()) ;
|
||||
}
|
||||
unsigned int spos = 9;
|
||||
for (unsigned int ie = 0; ie < nent; ie++) {
|
||||
unsigned int bid=0;
|
||||
unsigned int order=0;
|
||||
try {
|
||||
bid = FileParserUtils::stripSpacesAndCast<unsigned int>(text.substr(spos,4));
|
||||
spos += 4;
|
||||
if(text.size()>=spos+4 && text.substr(spos,4)!=" "){
|
||||
order = FileParserUtils::stripSpacesAndCast<unsigned int>(text.substr(spos,4));
|
||||
}
|
||||
if(!bid || bid>mol->getNumBonds() ){
|
||||
std::ostringstream errout;
|
||||
errout << "Bad ZBO specification on line "<<line;
|
||||
throw FileParseException(errout.str()) ;
|
||||
}
|
||||
spos += 4;
|
||||
--bid;
|
||||
Bond *bnd=mol->getBondWithIdx(bid);
|
||||
if(!bnd){
|
||||
std::ostringstream errout;
|
||||
errout << "Bond "<<bid<<" from ZBO specification on line "<<line<<" not found";
|
||||
throw FileParseException(errout.str()) ;
|
||||
} else {
|
||||
if(order==0){
|
||||
bnd->setBondType(Bond::ZERO);
|
||||
} else {
|
||||
bnd->setBondType(static_cast<Bond::BondType>(order));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (boost::bad_lexical_cast &) {
|
||||
std::ostringstream errout;
|
||||
errout << "Cannot convert " << text.substr(spos,4) << " to int on line "<<line;
|
||||
throw FileParseException(errout.str()) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ParseNewAtomList(RWMol *mol,const std::string &text,unsigned int line){
|
||||
if(text.size()<15){
|
||||
std::ostringstream errout;
|
||||
@@ -1214,6 +1265,7 @@ namespace RDKit{
|
||||
else if(lineBeg=="M STY") {
|
||||
ParseSGroup2000STYLine(mol, tempStr,line);
|
||||
}
|
||||
else if(lineBeg=="M ZBO") ParseZBOLine(mol,tempStr,line);
|
||||
line++;
|
||||
tempStr = getLine(inStream);
|
||||
lineBeg=tempStr.substr(0,6);
|
||||
|
||||
@@ -235,6 +235,23 @@ namespace RDKit{
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
const std::string GetMolFileZBOInfo(const RWMol &mol){
|
||||
std::stringstream res;
|
||||
std::stringstream ss;
|
||||
unsigned int nEntries=0;
|
||||
for(ROMol::ConstBondIterator bondIt=mol.beginBonds();
|
||||
bondIt!=mol.endBonds();++bondIt){
|
||||
if((*bondIt)->getBondType()==Bond::ZERO){
|
||||
++nEntries;
|
||||
ss<<" "<<std::setw(3)<<(*bondIt)->getIdx()+1<<" "<<std::setw(3)<<0;
|
||||
}
|
||||
}
|
||||
if(nEntries){
|
||||
res<<"M ZBO"<<std::setw(3)<<nEntries<<ss.str()<<std::endl;
|
||||
}
|
||||
return res.str();
|
||||
}
|
||||
|
||||
|
||||
const std::string AtomGetMolFileSymbol(const Atom *atom){
|
||||
PRECONDITION(atom,"");
|
||||
@@ -397,6 +414,7 @@ namespace RDKit{
|
||||
break;
|
||||
case Bond::TRIPLE: res=" 3";break;
|
||||
case Bond::AROMATIC: res=" 4";break;
|
||||
case Bond::ZERO: res=" 1";break;
|
||||
default: res=" 0";break;
|
||||
}
|
||||
return res;
|
||||
@@ -590,6 +608,7 @@ namespace RDKit{
|
||||
res += GetMolFileRGroupInfo(tmol);
|
||||
res += GetMolFileQueryInfo(tmol);
|
||||
res += GetMolFileAliasInfo(tmol);
|
||||
res += GetMolFileZBOInfo(tmol);
|
||||
|
||||
// FIX: R-group logic, SGroups and 3D features etc.
|
||||
res += "M END\n";
|
||||
|
||||
@@ -3257,6 +3257,62 @@ void testGithub166(){
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
void testZBO(){
|
||||
BOOST_LOG(rdInfoLog) << "testing ZBO parsing" << std::endl;
|
||||
std::string rdbase = getenv("RDBASE");
|
||||
rdbase += "/Code/GraphMol/FileParsers/test_data/";
|
||||
|
||||
{
|
||||
std::string fName;
|
||||
fName = rdbase+"FeCO5.mol";
|
||||
ROMol *m=MolFileToMol(fName);
|
||||
TEST_ASSERT(m);
|
||||
TEST_ASSERT(m->getNumAtoms()==11);
|
||||
TEST_ASSERT(m->getNumBonds()==10);
|
||||
TEST_ASSERT(m->getBondWithIdx(0)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(1)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(2)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(6)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(7)->getBondType()==Bond::ZERO);
|
||||
}
|
||||
{
|
||||
std::string fName;
|
||||
fName = rdbase+"CrBz.mol";
|
||||
ROMol *m=MolFileToMol(fName);
|
||||
TEST_ASSERT(m);
|
||||
TEST_ASSERT(m->getNumAtoms()==7);
|
||||
TEST_ASSERT(m->getNumBonds()==12);
|
||||
TEST_ASSERT(m->getBondWithIdx(6)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(7)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(8)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(9)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(10)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(11)->getBondType()==Bond::ZERO);
|
||||
}
|
||||
{
|
||||
std::string fName;
|
||||
fName = rdbase+"CrBz2.mol";
|
||||
ROMol *m=MolFileToMol(fName);
|
||||
TEST_ASSERT(m);
|
||||
TEST_ASSERT(m->getNumAtoms()==13);
|
||||
TEST_ASSERT(m->getNumBonds()==24);
|
||||
TEST_ASSERT(m->getBondWithIdx(6)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(7)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(8)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(9)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(10)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(11)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(18)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(19)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(20)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(21)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(22)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(23)->getBondType()==Bond::ZERO);
|
||||
}
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc,char *argv[]){
|
||||
@@ -3320,11 +3376,12 @@ int main(int argc,char *argv[]){
|
||||
testMolFileTotalValence();
|
||||
testGithub88();
|
||||
testGithub82();
|
||||
#endif
|
||||
testMolFileWithHs();
|
||||
testMolFileWithRxn();
|
||||
testPDBFile();
|
||||
testGithub166();
|
||||
#endif
|
||||
testZBO();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -768,6 +768,42 @@ void testMolFileWithRxn(){
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
void testZBO(){
|
||||
BOOST_LOG(rdInfoLog) << "testing handling of ZBO specs" << std::endl;
|
||||
std::string rdbase = getenv("RDBASE");
|
||||
rdbase += "/Code/GraphMol/FileParsers/test_data/";
|
||||
|
||||
{
|
||||
std::string fName;
|
||||
fName = rdbase+"FeCO5.mol";
|
||||
ROMol *m=MolFileToMol(fName);
|
||||
TEST_ASSERT(m);
|
||||
TEST_ASSERT(m->getNumAtoms()==11);
|
||||
TEST_ASSERT(m->getNumBonds()==10);
|
||||
TEST_ASSERT(m->getBondWithIdx(0)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(1)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(2)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(6)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(7)->getBondType()==Bond::ZERO);
|
||||
|
||||
std::string mb=MolToMolBlock(*m);
|
||||
delete m;
|
||||
m = MolBlockToMol(mb);
|
||||
TEST_ASSERT(m);
|
||||
TEST_ASSERT(m->getNumAtoms()==11);
|
||||
TEST_ASSERT(m->getNumBonds()==10);
|
||||
TEST_ASSERT(m->getBondWithIdx(0)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(1)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(2)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(6)->getBondType()==Bond::ZERO);
|
||||
TEST_ASSERT(m->getBondWithIdx(7)->getBondType()==Bond::ZERO);
|
||||
|
||||
delete m;
|
||||
}
|
||||
BOOST_LOG(rdInfoLog) << "done" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
RDLog::InitLogs();
|
||||
#if 1
|
||||
@@ -858,4 +894,8 @@ int main() {
|
||||
testMolFileWithRxn();
|
||||
BOOST_LOG(rdInfoLog) << "-----------------------------------------\n\n";
|
||||
|
||||
BOOST_LOG(rdInfoLog) << "-----------------------------------------\n";
|
||||
testZBO();
|
||||
BOOST_LOG(rdInfoLog) << "-----------------------------------------\n\n";
|
||||
|
||||
}
|
||||
|
||||
25
Code/GraphMol/FileParsers/test_data/CrBz.mol
Normal file
25
Code/GraphMol/FileParsers/test_data/CrBz.mol
Normal file
@@ -0,0 +1,25 @@
|
||||
|
||||
Mrv0541 11301306482D
|
||||
|
||||
7 12 0 0 0 0 999 V2000
|
||||
5.0679 1.7384 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.3534 1.3259 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.3534 0.5009 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.0679 0.0884 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.7823 0.5009 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.7823 1.3259 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2.8286 0.7955 0.0000 Cr 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
2 3 2 0 0 0 0
|
||||
3 4 1 0 0 0 0
|
||||
4 5 2 0 0 0 0
|
||||
5 6 1 0 0 0 0
|
||||
1 6 2 0 0 0 0
|
||||
7 2 1 0 0 0 0
|
||||
7 3 1 0 0 0 0
|
||||
7 4 1 0 0 0 0
|
||||
7 1 1 0 0 0 0
|
||||
7 6 1 0 0 0 0
|
||||
7 5 1 0 0 0 0
|
||||
M ZBO 6 7 0 8 0 9 0 10 0 11 0 12 0
|
||||
M END
|
||||
44
Code/GraphMol/FileParsers/test_data/CrBz2.mol
Normal file
44
Code/GraphMol/FileParsers/test_data/CrBz2.mol
Normal file
@@ -0,0 +1,44 @@
|
||||
|
||||
Mrv0541 11301306502D
|
||||
|
||||
13 24 0 0 0 0 999 V2000
|
||||
5.0679 1.7384 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.3534 1.3259 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
4.3534 0.5009 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.0679 0.0884 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.7823 0.5009 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
5.7823 1.3259 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
2.8286 0.7955 0.0000 Cr 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.7366 1.7089 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.0221 1.2964 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.0221 0.4714 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.7366 0.0589 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.4511 0.4714 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.4511 1.2964 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
2 3 2 0 0 0 0
|
||||
3 4 1 0 0 0 0
|
||||
4 5 2 0 0 0 0
|
||||
5 6 1 0 0 0 0
|
||||
1 6 2 0 0 0 0
|
||||
7 2 1 0 0 0 0
|
||||
7 3 1 0 0 0 0
|
||||
7 4 1 0 0 0 0
|
||||
7 1 1 0 0 0 0
|
||||
7 6 1 0 0 0 0
|
||||
7 5 1 0 0 0 0
|
||||
8 9 1 0 0 0 0
|
||||
9 10 2 0 0 0 0
|
||||
10 11 1 0 0 0 0
|
||||
11 12 2 0 0 0 0
|
||||
12 13 1 0 0 0 0
|
||||
8 13 2 0 0 0 0
|
||||
13 7 1 0 0 0 0
|
||||
12 7 1 0 0 0 0
|
||||
11 7 1 0 0 0 0
|
||||
10 7 1 0 0 0 0
|
||||
9 7 1 0 0 0 0
|
||||
8 7 1 0 0 0 0
|
||||
M ZBO 6 7 0 8 0 9 0 10 0 11 0 12 0
|
||||
M ZBO 6 19 0 20 0 21 0 22 0 23 0 24 0
|
||||
M END
|
||||
27
Code/GraphMol/FileParsers/test_data/FeCO5.mol
Normal file
27
Code/GraphMol/FileParsers/test_data/FeCO5.mol
Normal file
@@ -0,0 +1,27 @@
|
||||
|
||||
Mrv0541 11301306102D
|
||||
|
||||
11 10 0 0 0 0 999 V2000
|
||||
-0.3380 0.2050 0.0000 Fe 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0.4870 0.2050 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.0524 -0.2075 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.0524 0.6175 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1.3120 0.2050 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.7669 -0.6200 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-1.7669 1.0300 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.3380 1.0300 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.3380 -0.6200 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.3380 1.8550 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
-0.3380 -1.4450 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
1 2 1 0 0 0 0
|
||||
1 3 1 0 0 0 0
|
||||
1 4 1 0 0 0 0
|
||||
2 5 2 0 0 0 0
|
||||
3 6 2 0 0 0 0
|
||||
4 7 2 0 0 0 0
|
||||
1 8 1 0 0 0 0
|
||||
1 9 1 0 0 0 0
|
||||
8 10 2 0 0 0 0
|
||||
9 11 2 0 0 0 0
|
||||
M ZBO 5 1 0 2 0 3 0 7 0 8 0
|
||||
M END
|
||||
@@ -224,6 +224,7 @@ struct bond_wrapper {
|
||||
.value("DATIVEL",Bond::DATIVEL)
|
||||
.value("DATIVER",Bond::DATIVER)
|
||||
.value("OTHER",Bond::OTHER)
|
||||
.value("ZERO",Bond::ZERO)
|
||||
;
|
||||
python::enum_<Bond::BondDir>("BondDir")
|
||||
.value("NONE",Bond::NONE)
|
||||
|
||||
Reference in New Issue
Block a user