initial pass at ZBO support.

does not yet handle the HYD and ZCH lines
This commit is contained in:
Greg Landrum
2013-11-30 06:58:03 +01:00
parent 0dc4a58ebb
commit 3b0c72c4bf
12 changed files with 276 additions and 7 deletions

View File

@@ -154,6 +154,7 @@ double Bond::getBondTypeAsDouble() const {
case AROMATIC: return 1.5; break;
case DATIVEONE: return 1.0; break; // FIX: this should probably be different
case DATIVE: return 1.0; break; //FIX: again probably wrong
case ZERO: return 0; break;
default:
UNDER_CONSTRUCTION("Bad bond type");
}
@@ -187,6 +188,7 @@ double Bond::getValenceContrib(const Atom *atom) const {
if(atom->getIdx()==getEndAtomIdx())return 1.0;
else return 0.0;
break;
case ZERO: return 0; break;
default:
UNDER_CONSTRUCTION("Bad bond type");

View File

@@ -71,7 +71,8 @@ namespace RDKit{
DATIVE, //!< standard two-electron dative
DATIVEL, //!< standard two-electron dative
DATIVER, //!< standard two-electron dative
OTHER
OTHER,
ZERO //!< Zero-order bond (from http://pubs.acs.org/doi/abs/10.1021/ci200488k)
} BondType;
//! the bond's direction (for chirality)

View File

@@ -479,19 +479,19 @@ namespace Canon {
// Here's how the black magic works:
// - non-ring atom neighbors have their original ranks
// - ring atom neighbors have this added to their ranks:
// (Bond::OTHER - bondOrder)*MAX_NATOMS*MAX_NATOMS
// (MAX_BONDTYPE - bondOrder)*MAX_NATOMS*MAX_NATOMS
// - ring-closure neighbors lose a factor of:
// (Bond::OTHER+1)*MAX_NATOMS*MAX_NATOMS
// (MAX_BONDTYPE+1)*MAX_NATOMS*MAX_NATOMS
//
// This tactic biases us to traverse to non-ring neighbors first,
// original ordering if bond orders are all equal... crafty, neh?
//
// ---------------------
if( colors[otherIdx] == GREY_NODE ) {
rank -= static_cast<int>(Bond::OTHER+1) *
rank -= static_cast<int>(MAX_BONDTYPE+1) *
MAX_NATOMS*MAX_NATOMS;
if(!bondSymbols){
rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
MAX_NATOMS;
} else {
const std::string &symb=(*bondSymbols)[theBond->getIdx()];
@@ -500,7 +500,7 @@ namespace Canon {
}
} else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){
if(!bondSymbols){
rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
MAX_NATOMS*MAX_NATOMS;
} else {
const std::string &symb=(*bondSymbols)[theBond->getIdx()];

View File

@@ -22,6 +22,7 @@ namespace RDKit {
namespace Canon {
const int MAX_NATOMS=5000; //!< used in the canonical traversal code
const int MAX_CYCLES=99; //!< used in the canonical traversal code
const int MAX_BONDTYPE=32; //!< used in the canonical traversal code
//! used in traversals of the molecule
typedef enum {

View File

@@ -571,6 +571,57 @@ namespace RDKit{
}
}
void ParseZBOLine(RWMol *mol, const std::string &text,unsigned int line){
PRECONDITION(mol,"bad mol");
PRECONDITION(text.substr(0,6)==std::string("M ZBO"),"bad ZBO line");
unsigned int nent;
try {
nent = FileParserUtils::stripSpacesAndCast<unsigned int>(text.substr(6,3));
}
catch (boost::bad_lexical_cast &) {
std::ostringstream errout;
errout << "Cannot convert " << text.substr(6,3) << " to int on line "<<line;
throw FileParseException(errout.str()) ;
}
unsigned int spos = 9;
for (unsigned int ie = 0; ie < nent; ie++) {
unsigned int bid=0;
unsigned int order=0;
try {
bid = FileParserUtils::stripSpacesAndCast<unsigned int>(text.substr(spos,4));
spos += 4;
if(text.size()>=spos+4 && text.substr(spos,4)!=" "){
order = FileParserUtils::stripSpacesAndCast<unsigned int>(text.substr(spos,4));
}
if(!bid || bid>mol->getNumBonds() ){
std::ostringstream errout;
errout << "Bad ZBO specification on line "<<line;
throw FileParseException(errout.str()) ;
}
spos += 4;
--bid;
Bond *bnd=mol->getBondWithIdx(bid);
if(!bnd){
std::ostringstream errout;
errout << "Bond "<<bid<<" from ZBO specification on line "<<line<<" not found";
throw FileParseException(errout.str()) ;
} else {
if(order==0){
bnd->setBondType(Bond::ZERO);
} else {
bnd->setBondType(static_cast<Bond::BondType>(order));
}
}
}
catch (boost::bad_lexical_cast &) {
std::ostringstream errout;
errout << "Cannot convert " << text.substr(spos,4) << " to int on line "<<line;
throw FileParseException(errout.str()) ;
}
}
}
void ParseNewAtomList(RWMol *mol,const std::string &text,unsigned int line){
if(text.size()<15){
std::ostringstream errout;
@@ -1214,6 +1265,7 @@ namespace RDKit{
else if(lineBeg=="M STY") {
ParseSGroup2000STYLine(mol, tempStr,line);
}
else if(lineBeg=="M ZBO") ParseZBOLine(mol,tempStr,line);
line++;
tempStr = getLine(inStream);
lineBeg=tempStr.substr(0,6);

View File

@@ -235,6 +235,23 @@ namespace RDKit{
return ss.str();
}
const std::string GetMolFileZBOInfo(const RWMol &mol){
std::stringstream res;
std::stringstream ss;
unsigned int nEntries=0;
for(ROMol::ConstBondIterator bondIt=mol.beginBonds();
bondIt!=mol.endBonds();++bondIt){
if((*bondIt)->getBondType()==Bond::ZERO){
++nEntries;
ss<<" "<<std::setw(3)<<(*bondIt)->getIdx()+1<<" "<<std::setw(3)<<0;
}
}
if(nEntries){
res<<"M ZBO"<<std::setw(3)<<nEntries<<ss.str()<<std::endl;
}
return res.str();
}
const std::string AtomGetMolFileSymbol(const Atom *atom){
PRECONDITION(atom,"");
@@ -397,6 +414,7 @@ namespace RDKit{
break;
case Bond::TRIPLE: res=" 3";break;
case Bond::AROMATIC: res=" 4";break;
case Bond::ZERO: res=" 1";break;
default: res=" 0";break;
}
return res;
@@ -590,6 +608,7 @@ namespace RDKit{
res += GetMolFileRGroupInfo(tmol);
res += GetMolFileQueryInfo(tmol);
res += GetMolFileAliasInfo(tmol);
res += GetMolFileZBOInfo(tmol);
// FIX: R-group logic, SGroups and 3D features etc.
res += "M END\n";

View File

@@ -3257,6 +3257,62 @@ void testGithub166(){
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testZBO(){
BOOST_LOG(rdInfoLog) << "testing ZBO parsing" << std::endl;
std::string rdbase = getenv("RDBASE");
rdbase += "/Code/GraphMol/FileParsers/test_data/";
{
std::string fName;
fName = rdbase+"FeCO5.mol";
ROMol *m=MolFileToMol(fName);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms()==11);
TEST_ASSERT(m->getNumBonds()==10);
TEST_ASSERT(m->getBondWithIdx(0)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(1)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(2)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(6)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(7)->getBondType()==Bond::ZERO);
}
{
std::string fName;
fName = rdbase+"CrBz.mol";
ROMol *m=MolFileToMol(fName);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms()==7);
TEST_ASSERT(m->getNumBonds()==12);
TEST_ASSERT(m->getBondWithIdx(6)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(7)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(8)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(9)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(10)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(11)->getBondType()==Bond::ZERO);
}
{
std::string fName;
fName = rdbase+"CrBz2.mol";
ROMol *m=MolFileToMol(fName);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms()==13);
TEST_ASSERT(m->getNumBonds()==24);
TEST_ASSERT(m->getBondWithIdx(6)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(7)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(8)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(9)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(10)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(11)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(18)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(19)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(20)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(21)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(22)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(23)->getBondType()==Bond::ZERO);
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
int main(int argc,char *argv[]){
@@ -3320,11 +3376,12 @@ int main(int argc,char *argv[]){
testMolFileTotalValence();
testGithub88();
testGithub82();
#endif
testMolFileWithHs();
testMolFileWithRxn();
testPDBFile();
testGithub166();
#endif
testZBO();
return 0;
}

View File

@@ -768,6 +768,42 @@ void testMolFileWithRxn(){
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
void testZBO(){
BOOST_LOG(rdInfoLog) << "testing handling of ZBO specs" << std::endl;
std::string rdbase = getenv("RDBASE");
rdbase += "/Code/GraphMol/FileParsers/test_data/";
{
std::string fName;
fName = rdbase+"FeCO5.mol";
ROMol *m=MolFileToMol(fName);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms()==11);
TEST_ASSERT(m->getNumBonds()==10);
TEST_ASSERT(m->getBondWithIdx(0)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(1)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(2)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(6)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(7)->getBondType()==Bond::ZERO);
std::string mb=MolToMolBlock(*m);
delete m;
m = MolBlockToMol(mb);
TEST_ASSERT(m);
TEST_ASSERT(m->getNumAtoms()==11);
TEST_ASSERT(m->getNumBonds()==10);
TEST_ASSERT(m->getBondWithIdx(0)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(1)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(2)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(6)->getBondType()==Bond::ZERO);
TEST_ASSERT(m->getBondWithIdx(7)->getBondType()==Bond::ZERO);
delete m;
}
BOOST_LOG(rdInfoLog) << "done" << std::endl;
}
int main() {
RDLog::InitLogs();
#if 1
@@ -858,4 +894,8 @@ int main() {
testMolFileWithRxn();
BOOST_LOG(rdInfoLog) << "-----------------------------------------\n\n";
BOOST_LOG(rdInfoLog) << "-----------------------------------------\n";
testZBO();
BOOST_LOG(rdInfoLog) << "-----------------------------------------\n\n";
}

View File

@@ -0,0 +1,25 @@
Mrv0541 11301306482D
7 12 0 0 0 0 999 V2000
5.0679 1.7384 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3534 1.3259 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3534 0.5009 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.0679 0.0884 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.7823 0.5009 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.7823 1.3259 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.8286 0.7955 0.0000 Cr 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
2 3 2 0 0 0 0
3 4 1 0 0 0 0
4 5 2 0 0 0 0
5 6 1 0 0 0 0
1 6 2 0 0 0 0
7 2 1 0 0 0 0
7 3 1 0 0 0 0
7 4 1 0 0 0 0
7 1 1 0 0 0 0
7 6 1 0 0 0 0
7 5 1 0 0 0 0
M ZBO 6 7 0 8 0 9 0 10 0 11 0 12 0
M END

View File

@@ -0,0 +1,44 @@
Mrv0541 11301306502D
13 24 0 0 0 0 999 V2000
5.0679 1.7384 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3534 1.3259 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.3534 0.5009 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.0679 0.0884 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.7823 0.5009 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
5.7823 1.3259 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
2.8286 0.7955 0.0000 Cr 0 0 0 0 0 0 0 0 0 0 0 0
0.7366 1.7089 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0221 1.2964 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0221 0.4714 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.7366 0.0589 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.4511 0.4714 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.4511 1.2964 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
2 3 2 0 0 0 0
3 4 1 0 0 0 0
4 5 2 0 0 0 0
5 6 1 0 0 0 0
1 6 2 0 0 0 0
7 2 1 0 0 0 0
7 3 1 0 0 0 0
7 4 1 0 0 0 0
7 1 1 0 0 0 0
7 6 1 0 0 0 0
7 5 1 0 0 0 0
8 9 1 0 0 0 0
9 10 2 0 0 0 0
10 11 1 0 0 0 0
11 12 2 0 0 0 0
12 13 1 0 0 0 0
8 13 2 0 0 0 0
13 7 1 0 0 0 0
12 7 1 0 0 0 0
11 7 1 0 0 0 0
10 7 1 0 0 0 0
9 7 1 0 0 0 0
8 7 1 0 0 0 0
M ZBO 6 7 0 8 0 9 0 10 0 11 0 12 0
M ZBO 6 19 0 20 0 21 0 22 0 23 0 24 0
M END

View File

@@ -0,0 +1,27 @@
Mrv0541 11301306102D
11 10 0 0 0 0 999 V2000
-0.3380 0.2050 0.0000 Fe 0 0 0 0 0 0 0 0 0 0 0 0
0.4870 0.2050 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.0524 -0.2075 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-1.0524 0.6175 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.3120 0.2050 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-1.7669 -0.6200 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-1.7669 1.0300 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.3380 1.0300 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.3380 -0.6200 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
-0.3380 1.8550 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
-0.3380 -1.4450 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0 0 0 0
1 3 1 0 0 0 0
1 4 1 0 0 0 0
2 5 2 0 0 0 0
3 6 2 0 0 0 0
4 7 2 0 0 0 0
1 8 1 0 0 0 0
1 9 1 0 0 0 0
8 10 2 0 0 0 0
9 11 2 0 0 0 0
M ZBO 5 1 0 2 0 3 0 7 0 8 0
M END

View File

@@ -224,6 +224,7 @@ struct bond_wrapper {
.value("DATIVEL",Bond::DATIVEL)
.value("DATIVER",Bond::DATIVER)
.value("OTHER",Bond::OTHER)
.value("ZERO",Bond::ZERO)
;
python::enum_<Bond::BondDir>("BondDir")
.value("NONE",Bond::NONE)