Add 'k' extension to SMARTS to support ringsize queries (#9172)

* switch the Query infrastructure to use std::function

* add releasenotes mention

* refactor makeAtomInRingOfSizeQuery() to use lambdas and support range queries

* add 'k' atom query to SMARTS

* changes in response to review
This commit is contained in:
Greg Landrum
2026-03-17 15:02:59 +01:00
committed by GitHub
parent 1a3cc2d5e8
commit 972b31e239
12 changed files with 921 additions and 613 deletions

View File

@@ -95,70 +95,27 @@ ATOM_EQUALS_QUERY *makeAtomRingBondCountQuery(int what) {
};
ATOM_EQUALS_QUERY *makeAtomInRingOfSizeQuery(int tgt) {
RANGE_CHECK(3, tgt, 20);
auto *res = new ATOM_EQUALS_QUERY;
res->setVal(tgt);
switch (tgt) {
case 3:
res->setDataFunc(queryAtomIsInRingOfSize<3>);
break;
case 4:
res->setDataFunc(queryAtomIsInRingOfSize<4>);
break;
case 5:
res->setDataFunc(queryAtomIsInRingOfSize<5>);
break;
case 6:
res->setDataFunc(queryAtomIsInRingOfSize<6>);
break;
case 7:
res->setDataFunc(queryAtomIsInRingOfSize<7>);
break;
case 8:
res->setDataFunc(queryAtomIsInRingOfSize<8>);
break;
case 9:
res->setDataFunc(queryAtomIsInRingOfSize<9>);
break;
case 10:
res->setDataFunc(queryAtomIsInRingOfSize<10>);
break;
case 11:
res->setDataFunc(queryAtomIsInRingOfSize<11>);
break;
case 12:
res->setDataFunc(queryAtomIsInRingOfSize<12>);
break;
case 13:
res->setDataFunc(queryAtomIsInRingOfSize<13>);
break;
case 14:
res->setDataFunc(queryAtomIsInRingOfSize<14>);
break;
case 15:
res->setDataFunc(queryAtomIsInRingOfSize<15>);
break;
case 16:
res->setDataFunc(queryAtomIsInRingOfSize<16>);
break;
case 17:
res->setDataFunc(queryAtomIsInRingOfSize<17>);
break;
case 18:
res->setDataFunc(queryAtomIsInRingOfSize<18>);
break;
case 19:
res->setDataFunc(queryAtomIsInRingOfSize<19>);
break;
case 20:
res->setDataFunc(queryAtomIsInRingOfSize<20>);
break;
}
res->setDataFunc(
[tgt](Atom const *at) { return queryAtomIsInRingOfSize(at, tgt); });
res->setDescription("AtomRingSize");
return res;
}
ATOM_RANGE_QUERY *makeAtomInRingOfSizeQuery(int lower, int upper,
bool lowerOpen, bool upperOpen) {
auto *res = new ATOM_RANGE_QUERY;
res->setLower(lower);
res->setUpper(upper);
res->setEndsOpen(lowerOpen, upperOpen);
res->setDataFunc([lower, upper, lowerOpen, upperOpen](Atom const *at) {
return queryAtomIsInRingOfSize(at, lower, upper, lowerOpen, upperOpen);
});
res->setDescription("range_AtomRingSize");
return res;
}
BOND_EQUALS_QUERY *makeBondInRingOfSizeQuery(int tgt) {
RANGE_CHECK(3, tgt, 20);
auto *res = new BOND_EQUALS_QUERY;
@@ -1046,28 +1003,70 @@ Atom *replaceAtomWithQueryAtom(RWMol *mol, Atom *atom) {
return mol->getAtomWithIdx(idx);
}
enum class RangeQueryType : char {
EQUAL,
LESS,
GREATER,
RANGE
};
void finalizeAtomRingSizeQuery(Queries::Query<int, Atom const *, true> *query,
RangeQueryType qtype) {
switch (qtype) {
case RangeQueryType::EQUAL: {
auto tgt = static_cast<ATOM_EQUALS_QUERY *>(query)->getVal();
query->setDataFunc(
[tgt](Atom const *at) { return queryAtomIsInRingOfSize(at, tgt); });
} break;
case RangeQueryType::RANGE: {
auto rq = static_cast<ATOM_RANGE_QUERY *>(query);
auto uv = rq->getUpper();
auto lv = rq->getLower();
auto [lo, uo] = rq->getEndsOpen();
query->setDataFunc([lv, uv, lo, uo](Atom const *at) {
return queryAtomIsInRingOfSize(at, lv, uv, lo, uo);
});
} break;
case RangeQueryType::LESS: {
auto lv = static_cast<ATOM_LESSEQUAL_QUERY *>(query)->getVal();
auto uv = -1;
query->setDataFunc([lv, uv](Atom const *at) {
return queryAtomIsInRingOfSize(at, lv, uv);
});
} break;
case RangeQueryType::GREATER: {
auto lv = -1;
auto uv = static_cast<ATOM_GREATEREQUAL_QUERY *>(query)->getVal();
query->setDataFunc([lv, uv](Atom const *at) {
return queryAtomIsInRingOfSize(at, lv, uv);
});
} break;
default:
throw ValueErrorException("bad range query type");
}
}
void finalizeQueryFromDescription(
Queries::Query<int, Atom const *, true> *query, Atom const *) {
std::string descr = query->getDescription();
RangeQueryType qtype = RangeQueryType::EQUAL;
if (boost::starts_with(descr, "range_")) {
descr = descr.substr(6);
qtype = RangeQueryType::RANGE;
} else if (boost::starts_with(descr, "less_")) {
descr = descr.substr(5);
qtype = RangeQueryType::LESS;
} else if (boost::starts_with(descr, "greater_")) {
descr = descr.substr(8);
qtype = RangeQueryType::GREATER;
}
Queries::Query<int, Atom const *, true> *tmpQuery;
if (descr == "AtomRingBondCount") {
query->setDataFunc(queryAtomRingBondCount);
} else if (descr == "AtomHasRingBond") {
query->setDataFunc(queryAtomHasRingBond);
} else if (descr == "AtomRingSize") {
tmpQuery = makeAtomInRingOfSizeQuery(
static_cast<ATOM_EQUALS_QUERY *>(query)->getVal());
query->setDataFunc(tmpQuery->getDataFunc());
delete tmpQuery;
finalizeAtomRingSizeQuery(query, qtype);
} else if (descr == "AtomMinRingSize") {
query->setDataFunc(queryAtomMinRingSize);
} else if (descr == "AtomImplicitValence") {

View File

@@ -24,6 +24,9 @@
#include <DataStructs/BitOps.h>
#include <functional>
#include <functional>
#include <limits>
#ifdef RDK_BUILD_THREADSAFE_SSS
#include <mutex>
#include <utility>
@@ -322,6 +325,38 @@ static inline int queryAtomRingBondCount(Atom const *at) {
return res;
}
static inline int queryAtomIsInRingOfSize(Atom const *at, int tgt) {
if (at->getOwningMol().getRingInfo()->isAtomInRingOfSize(at->getIdx(), tgt)) {
return tgt;
} else {
return 0;
}
};
//! returns the size of an SSSR ring the atom is in that's within the specified
//! range, or a value outside the range if there are no rings with a size in the
//! range. passing -1 for a bound leaves the range without a limit in that
//! direction
//! always returns a value outside the range for atoms that are not in a ring
static inline int queryAtomIsInRingOfSize(Atom const *at, int lower, int upper,
bool lowerOpen = false,
bool upperOpen = false) {
const auto ri = at->getOwningMol().getRingInfo();
for (const auto ringSize : ri->atomRingSizes(at->getIdx())) {
if ((ringSize > lower || (ringSize == lower && !lowerOpen)) &&
(upper < 0 ||
(ringSize < upper || (ringSize == upper && !upperOpen)))) {
return ringSize;
}
}
// we didn't find it, return a result that's not in the acceptable range:
if (lower > -1) {
return -1;
} else if (upper > -1) {
return std::numeric_limits<int>::max();
} else {
return 0;
}
};
template <int tgt>
int queryAtomIsInRingOfSize(Atom const *at) {
if (at->getOwningMol().getRingInfo()->isAtomInRingOfSize(at->getIdx(), tgt)) {
@@ -562,7 +597,13 @@ T *makeAtomInNRingsQuery(int what, const std::string &descr) {
RDKIT_GRAPHMOL_EXPORT ATOM_EQUALS_QUERY *makeAtomInNRingsQuery(int what);
//! returns a Query for matching atoms in rings of a particular size
template <class T>
T *makeAtomInRingOfSizeQuery(int tgt, const std::string &descr);
//! \overload
RDKIT_GRAPHMOL_EXPORT ATOM_EQUALS_QUERY *makeAtomInRingOfSizeQuery(int tgt);
//! \overload
RDKIT_GRAPHMOL_EXPORT ATOM_RANGE_QUERY *makeAtomInRingOfSizeQuery(
int lower, int upper, bool lowerOpen = false, bool upperOpen = false);
//! returns a Query for matching an atom's minimum ring size
template <class T>

View File

@@ -180,6 +180,10 @@ std::string getAtomSmartsSimple(const QueryAtom *qatom,
res << "r";
hasVal = true;
needParen = true;
} else if (descrip == "AtomRingSize") {
res << "k";
hasVal = true;
needParen = true;
} else if (descrip == "AtomInNRings") {
res << "R";
if (mods == Modifiers::NONE && equery && equery->getVal() >= 0) {

View File

@@ -558,8 +558,8 @@ static void yynoreturn yy_fatal_error ( const char* msg , yyscan_t yyscanner );
yyg->yy_hold_char = *yy_cp; \
*yy_cp = '\0'; \
yyg->yy_c_buf_p = yy_cp;
#define YY_NUM_RULES 183
#define YY_END_OF_BUFFER 184
#define YY_NUM_RULES 184
#define YY_END_OF_BUFFER 185
/* This struct is not used in this scanner,
but its presence is necessary. */
struct yy_trans_info
@@ -567,32 +567,33 @@ struct yy_trans_info
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
static const flex_int16_t yy_accept[221] =
static const flex_int16_t yy_accept[222] =
{ 0,
0, 0, 0, 0, 0, 0, 0, 0, 184, 182,
181, 171, 148, 151, 168, 173, 159, 143, 157, 174,
156, 167, 153, 169, 170, 146, 172, 182, 149, 6,
145, 123, 124, 127, 122, 132, 125, 126, 128, 129,
164, 152, 166, 182, 147, 144, 133, 134, 135, 136,
137, 138, 162, 163, 150, 151, 145, 123, 124, 112,
182, 127, 182, 122, 132, 16, 182, 182, 125, 126,
128, 120, 129, 182, 87, 20, 69, 114, 35, 118,
165, 144, 113, 119, 121, 138, 182, 116, 115, 117,
160, 161, 154, 155, 0, 0, 0, 0, 0, 131,
0, 0, 0, 0, 0, 0, 0, 0, 185, 183,
182, 172, 149, 152, 169, 174, 160, 144, 158, 175,
157, 168, 154, 170, 171, 147, 173, 183, 150, 6,
146, 124, 125, 128, 123, 133, 126, 127, 129, 130,
165, 153, 167, 183, 148, 145, 134, 135, 136, 137,
138, 139, 163, 164, 151, 152, 146, 124, 125, 112,
183, 128, 183, 123, 133, 16, 183, 183, 126, 127,
129, 120, 130, 183, 87, 20, 69, 114, 35, 118,
166, 145, 113, 119, 122, 121, 139, 183, 116, 115,
117, 161, 162, 155, 156, 0, 0, 0, 0, 0,
130, 152, 175, 176, 177, 178, 179, 180, 158, 84,
43, 13, 90, 15, 30, 80, 74, 51, 9, 102,
78, 92, 17, 44, 53, 93, 91, 107, 23, 21,
50, 26, 100, 105, 61, 63, 94, 58, 24, 109,
95, 82, 28, 59, 29, 7, 67, 75, 62, 103,
45, 72, 32, 52, 8, 98, 66, 111, 96, 12,
22, 38, 104, 11, 37, 55, 10, 25, 97, 88,
71, 86, 77, 42, 56, 79, 54, 73, 89, 83,
33, 70, 99, 106, 41, 81, 40, 47, 18, 31,
101, 14, 57, 46, 34, 68, 60, 39, 48, 85,
132, 131, 153, 176, 177, 178, 179, 180, 181, 159,
84, 43, 13, 90, 15, 30, 80, 74, 51, 9,
102, 78, 92, 17, 44, 53, 93, 91, 107, 23,
21, 50, 26, 100, 105, 61, 63, 94, 58, 24,
109, 95, 82, 28, 59, 29, 7, 67, 75, 62,
103, 45, 72, 32, 52, 8, 98, 66, 111, 96,
12, 22, 38, 104, 11, 37, 55, 10, 25, 97,
88, 71, 86, 77, 42, 56, 79, 54, 73, 89,
83, 33, 70, 99, 106, 41, 81, 40, 47, 18,
31, 101, 14, 57, 46, 34, 68, 60, 39, 48,
19, 76, 64, 0, 49, 65, 27, 36, 140, 141,
139, 142, 2, 5, 3, 4, 1, 110, 108, 0
85, 19, 76, 64, 0, 49, 65, 27, 36, 141,
142, 140, 143, 2, 5, 3, 4, 1, 110, 108,
0
} ;
static const YY_CHAR yy_ec[256] =
@@ -640,18 +641,18 @@ static const YY_CHAR yy_meta[85] =
1, 1, 1, 1
} ;
static const flex_int16_t yy_base[221] =
static const flex_int16_t yy_base[222] =
{ 0,
0, 0, 79, 0, 217, 215, 213, 211, 220, 278,
0, 0, 79, 0, 222, 217, 215, 213, 221, 278,
278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
189, 278, 278, 278, 278, 278, 278, 202, 278, 103,
278, 141, 141, 278, 278, 278, 278, 278, 278, 278,
278, 149, 278, 69, 278, 278, 278, 278, 278, 278,
278, 278, 278, 278, 278, 192, 98, 33, 107, 76,
20, 74, 102, 122, 31, 126, 121, 131, 146, 116,
152, 171, 180, 196, 113, 278, 278, 120, 102, 68,
278, 77, 278, 278, 278, 41, 84, 278, 278, 278,
278, 278, 278, 278, 230, 63, 61, 49, 103, 278,
192, 278, 278, 278, 278, 278, 278, 203, 278, 103,
278, 142, 146, 278, 278, 278, 278, 278, 278, 278,
278, 155, 278, 69, 278, 278, 278, 278, 278, 278,
278, 278, 278, 278, 278, 195, 98, 33, 107, 76,
20, 74, 102, 122, 31, 127, 121, 131, 146, 125,
152, 171, 180, 196, 114, 278, 278, 127, 123, 68,
278, 87, 278, 278, 278, 278, 41, 89, 278, 278,
278, 278, 278, 278, 278, 230, 63, 61, 49, 103,
278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
@@ -664,36 +665,38 @@ static const flex_int16_t yy_base[221] =
278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
278, 278, 278, 116, 278, 278, 278, 278, 278, 278,
278, 278, 278, 278, 278, 278, 278, 278, 278, 278
278, 278, 278, 278, 116, 278, 278, 278, 278, 278,
278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
278
} ;
static const flex_int16_t yy_def[221] =
static const flex_int16_t yy_def[222] =
{ 0,
220, 1, 1, 3, 1, 1, 1, 1, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
221, 1, 1, 3, 1, 1, 1, 1, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 0
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
0
} ;
static const flex_int16_t yy_nxt[363] =
@@ -706,38 +709,38 @@ static const flex_int16_t yy_nxt[363] =
10, 10, 10, 41, 42, 43, 44, 45, 46, 47,
48, 10, 10, 10, 10, 10, 10, 10, 10, 10,
49, 50, 51, 10, 52, 10, 10, 10, 10, 10,
10, 53, 54, 55, 56, 103, 104, 105, 106, 107,
108, 118, 215, 136, 137, 119, 138, 214, 120, 121,
10, 53, 54, 55, 56, 104, 105, 106, 107, 108,
109, 119, 216, 137, 138, 120, 139, 215, 121, 122,
122, 151, 213, 210, 152, 95, 100, 211, 57, 58,
123, 152, 214, 211, 153, 96, 101, 212, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
69, 70, 71, 72, 73, 74, 75, 76, 77, 78,
79, 80, 96, 216, 81, 133, 139, 82, 207, 217,
83, 208, 140, 141, 84, 97, 212, 142, 98, 99,
134, 209, 85, 86, 87, 135, 88, 89, 110, 90,
143, 206, 111, 144, 145, 123, 112, 113, 124, 125,
126, 114, 115, 116, 117, 101, 127, 128, 129, 154,
130, 131, 205, 132, 146, 147, 148, 155, 218, 204,
171, 219, 159, 149, 156, 160, 150, 157, 158, 153,
79, 80, 97, 217, 81, 134, 140, 82, 208, 218,
83, 209, 141, 142, 84, 98, 85, 143, 99, 100,
135, 213, 86, 87, 88, 136, 89, 90, 111, 91,
144, 210, 112, 145, 146, 124, 113, 114, 125, 126,
127, 115, 116, 117, 118, 102, 128, 129, 130, 155,
131, 132, 207, 133, 147, 148, 149, 156, 219, 206,
205, 220, 160, 150, 157, 161, 151, 158, 159, 172,
109, 161, 162, 102, 164, 165, 163, 166, 167, 101,
172, 173, 168, 174, 100, 94, 93, 169, 170, 220,
92, 175, 92, 176, 91, 177, 91, 178, 179, 180,
181, 220, 95, 182, 183, 184, 185, 220, 220, 188,
189, 186, 190, 220, 191, 220, 192, 187, 220, 193,
194, 220, 220, 195, 196, 197, 198, 220, 199, 96,
220, 200, 201, 220, 202, 203, 220, 220, 220, 220,
220, 220, 97, 220, 220, 98, 99, 9, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
154, 162, 163, 110, 165, 166, 164, 167, 168, 103,
173, 174, 169, 175, 102, 101, 95, 170, 171, 94,
221, 176, 93, 177, 93, 178, 92, 179, 180, 181,
182, 92, 96, 183, 184, 185, 186, 221, 221, 189,
190, 187, 191, 221, 192, 221, 193, 188, 221, 194,
195, 221, 221, 196, 197, 198, 199, 221, 200, 97,
221, 201, 202, 221, 203, 204, 221, 221, 221, 221,
221, 221, 98, 221, 221, 99, 100, 9, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221
} ;
static const flex_int16_t yy_chk[363] =
@@ -751,37 +754,37 @@ static const flex_int16_t yy_chk[363] =
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 3, 44, 44, 44, 44, 44,
44, 58, 98, 61, 61, 58, 61, 97, 58, 58,
44, 58, 99, 61, 61, 58, 61, 98, 58, 58,
58, 65, 96, 86, 65, 30, 58, 86, 3, 3,
58, 65, 97, 87, 65, 30, 58, 87, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 30, 99, 3, 60, 62, 3, 80, 99,
3, 80, 62, 62, 3, 30, 87, 62, 30, 30,
60, 82, 3, 3, 3, 60, 3, 3, 57, 3,
63, 79, 57, 63, 63, 59, 57, 57, 59, 59,
3, 3, 30, 100, 3, 60, 62, 3, 80, 100,
3, 80, 62, 62, 3, 30, 3, 62, 30, 30,
60, 88, 3, 3, 3, 60, 3, 3, 57, 3,
63, 82, 57, 63, 63, 59, 57, 57, 59, 59,
59, 57, 57, 57, 57, 59, 59, 59, 59, 67,
59, 59, 78, 59, 64, 64, 64, 67, 204, 75,
70, 204, 68, 64, 67, 68, 64, 67, 67, 66,
59, 59, 79, 59, 64, 64, 64, 67, 205, 78,
75, 205, 68, 64, 67, 68, 64, 67, 67, 70,
56, 68, 68, 42, 69, 69, 68, 69, 69, 33,
71, 71, 69, 71, 32, 28, 21, 69, 69, 9,
8, 71, 7, 71, 6, 71, 5, 71, 71, 72,
72, 0, 95, 72, 72, 72, 72, 0, 0, 73,
66, 68, 68, 56, 69, 69, 68, 69, 69, 42,
71, 71, 69, 71, 33, 32, 28, 69, 69, 21,
9, 71, 8, 71, 7, 71, 6, 71, 71, 72,
72, 5, 96, 72, 72, 72, 72, 0, 0, 73,
73, 72, 73, 0, 73, 0, 73, 72, 0, 73,
73, 0, 0, 73, 74, 74, 74, 0, 74, 95,
73, 0, 0, 73, 74, 74, 74, 0, 74, 96,
0, 74, 74, 0, 74, 74, 0, 0, 0, 0,
0, 0, 95, 0, 0, 95, 95, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
0, 0, 96, 0, 0, 96, 96, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220, 220, 220, 220, 220, 220, 220, 220, 220,
220, 220
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221, 221, 221, 221, 221, 221, 221, 221, 221,
221, 221
} ;
/* The intent behind this definition is that it'll catch
@@ -837,16 +840,17 @@ size_t setup_smarts_string(const std::string &text,yyscan_t yyscanner){
/* Get memory for full buffer, including space for trailing EOB's. */
n = _yybytes_len + 2;
buf = (char *) yysmarts_alloc(n ,yyscanner );
if ( ! buf )
if ( ! buf ) {
smarts_lexer_error( "out of dynamic memory in yysmarts__scan_bytes()" );
}
// ltrim
for(start = 0 ; start < _yybytes_len; ++start) {
if (yybytes[start] > 32) break;
if (yybytes[start] > 32) { break; }
}
for(end = _yybytes_len ; end > start; --end) {
if (yybytes[end] > 32) break;
if (yybytes[end] > 32) { break; }
}
_yybytes_len = end-start+1;
@@ -856,8 +860,9 @@ size_t setup_smarts_string(const std::string &text,yyscan_t yyscanner){
buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
b = yysmarts__scan_buffer(buf,n ,yyscanner);
if ( ! b )
if ( ! b ) {
smarts_lexer_error( "bad buffer in yysmarts__scan_bytes()" );
}
/* It's okay to grow etc. this buffer, and we should throw it
* away when we're done.
@@ -1183,13 +1188,13 @@ yy_match:
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
if ( yy_current_state >= 221 )
if ( yy_current_state >= 222 )
yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
++yy_cp;
}
while ( yy_current_state != 220 );
while ( yy_current_state != 221 );
yy_cp = yyg->yy_last_accepting_cpos;
yy_current_state = yyg->yy_last_accepting_state;
@@ -1420,102 +1425,110 @@ YY_RULE_SETUP
{
yylval->atom = new QueryAtom();
yylval->atom->setQuery(makeAtomInRingQuery());
return RINGSIZE_ATOM_QUERY_TOKEN;
return MIN_RINGSIZE_ATOM_QUERY_TOKEN;
}
YY_BREAK
case 122:
YY_RULE_SETUP
{ return H_TOKEN; }
{
yylval->atom = new QueryAtom();
yylval->atom->setQuery(makeAtomInRingQuery());
return RINGSIZE_ATOM_QUERY_TOKEN;
}
YY_BREAK
case 123:
YY_RULE_SETUP
{ yylval->ival = 5; return ORGANIC_ATOM_TOKEN; }
{ return H_TOKEN; }
YY_BREAK
case 124:
YY_RULE_SETUP
{ yylval->ival = 6; return ORGANIC_ATOM_TOKEN; }
{ yylval->ival = 5; return ORGANIC_ATOM_TOKEN; }
YY_BREAK
case 125:
YY_RULE_SETUP
{ yylval->ival = 7; return ORGANIC_ATOM_TOKEN; }
{ yylval->ival = 6; return ORGANIC_ATOM_TOKEN; }
YY_BREAK
case 126:
YY_RULE_SETUP
{ yylval->ival = 8; return ORGANIC_ATOM_TOKEN; }
{ yylval->ival = 7; return ORGANIC_ATOM_TOKEN; }
YY_BREAK
case 127:
YY_RULE_SETUP
{ yylval->ival = 9; return ORGANIC_ATOM_TOKEN; }
{ yylval->ival = 8; return ORGANIC_ATOM_TOKEN; }
YY_BREAK
case 128:
YY_RULE_SETUP
{ yylval->ival = 15; return ORGANIC_ATOM_TOKEN; }
{ yylval->ival = 9; return ORGANIC_ATOM_TOKEN; }
YY_BREAK
case 129:
YY_RULE_SETUP
{ yylval->ival = 16; return ORGANIC_ATOM_TOKEN; }
{ yylval->ival = 15; return ORGANIC_ATOM_TOKEN; }
YY_BREAK
case 130:
YY_RULE_SETUP
{ yylval->ival = 17; return ORGANIC_ATOM_TOKEN; }
{ yylval->ival = 16; return ORGANIC_ATOM_TOKEN; }
YY_BREAK
case 131:
YY_RULE_SETUP
{ yylval->ival = 35; return ORGANIC_ATOM_TOKEN; }
{ yylval->ival = 17; return ORGANIC_ATOM_TOKEN; }
YY_BREAK
case 132:
YY_RULE_SETUP
{ yylval->ival = 53; return ORGANIC_ATOM_TOKEN; }
{ yylval->ival = 35; return ORGANIC_ATOM_TOKEN; }
YY_BREAK
case 133:
YY_RULE_SETUP
{ yylval->ival = 5; return AROMATIC_ATOM_TOKEN; }
{ yylval->ival = 53; return ORGANIC_ATOM_TOKEN; }
YY_BREAK
case 134:
YY_RULE_SETUP
{ yylval->ival = 6; return AROMATIC_ATOM_TOKEN; }
{ yylval->ival = 5; return AROMATIC_ATOM_TOKEN; }
YY_BREAK
case 135:
YY_RULE_SETUP
{ yylval->ival = 7; return AROMATIC_ATOM_TOKEN; }
{ yylval->ival = 6; return AROMATIC_ATOM_TOKEN; }
YY_BREAK
case 136:
YY_RULE_SETUP
{ yylval->ival = 8; return AROMATIC_ATOM_TOKEN; }
{ yylval->ival = 7; return AROMATIC_ATOM_TOKEN; }
YY_BREAK
case 137:
YY_RULE_SETUP
{ yylval->ival = 15; return AROMATIC_ATOM_TOKEN; }
{ yylval->ival = 8; return AROMATIC_ATOM_TOKEN; }
YY_BREAK
case 138:
YY_RULE_SETUP
{ yylval->ival = 16; return AROMATIC_ATOM_TOKEN; }
{ yylval->ival = 15; return AROMATIC_ATOM_TOKEN; }
YY_BREAK
case 139:
YY_RULE_SETUP
{ yylval->ival = 14; return AROMATIC_ATOM_TOKEN; }
{ yylval->ival = 16; return AROMATIC_ATOM_TOKEN; }
YY_BREAK
case 140:
YY_RULE_SETUP
{ yylval->ival = 33; return AROMATIC_ATOM_TOKEN; }
{ yylval->ival = 14; return AROMATIC_ATOM_TOKEN; }
YY_BREAK
case 141:
YY_RULE_SETUP
{ yylval->ival = 34; return AROMATIC_ATOM_TOKEN; }
{ yylval->ival = 33; return AROMATIC_ATOM_TOKEN; }
YY_BREAK
case 142:
YY_RULE_SETUP
{ yylval->ival = 52; return AROMATIC_ATOM_TOKEN; }
{ yylval->ival = 34; return AROMATIC_ATOM_TOKEN; }
YY_BREAK
case 143:
YY_RULE_SETUP
{ yylval->ival = 52; return AROMATIC_ATOM_TOKEN; }
YY_BREAK
case 144:
YY_RULE_SETUP
{
yylval->atom = new QueryAtom();
yylval->atom->setQuery(makeAtomNullQuery());
return SIMPLE_ATOM_QUERY_TOKEN;
}
YY_BREAK
case 144:
case 145:
YY_RULE_SETUP
{
yylval->atom = new QueryAtom();
@@ -1524,7 +1537,7 @@ YY_RULE_SETUP
return SIMPLE_ATOM_QUERY_TOKEN;
}
YY_BREAK
case 145:
case 146:
YY_RULE_SETUP
{
yylval->atom = new QueryAtom();
@@ -1532,105 +1545,105 @@ YY_RULE_SETUP
return SIMPLE_ATOM_QUERY_TOKEN;
}
YY_BREAK
case 146:
case 147:
YY_RULE_SETUP
{ return COLON_TOKEN; }
YY_BREAK
case 147:
case 148:
YY_RULE_SETUP
{ return UNDERSCORE_TOKEN; }
YY_BREAK
case 148:
case 149:
YY_RULE_SETUP
{ return HASH_TOKEN; }
YY_BREAK
case 149:
case 150:
YY_RULE_SETUP
{ yylval->bond = new QueryBond(Bond::DOUBLE);
yylval->bond->setQuery(makeBondOrderEqualsQuery(Bond::DOUBLE));
return BOND_TOKEN; }
YY_BREAK
case 150:
case 151:
YY_RULE_SETUP
{ yylval->bond = new QueryBond();
yylval->bond->setQuery(makeBondNullQuery());
return BOND_TOKEN; }
YY_BREAK
case 151:
case 152:
YY_RULE_SETUP
{ yylval->bond = new QueryBond(Bond::QUADRUPLE);
yylval->bond->setQuery(makeBondOrderEqualsQuery(Bond::QUADRUPLE));
return BOND_TOKEN; }
YY_BREAK
case 152:
case 153:
YY_RULE_SETUP
{ yylval->bond = new QueryBond(Bond::SINGLE);
yylval->bond->setBondDir(Bond::ENDDOWNRIGHT);
yylval->bond->setQuery(makeSingleOrAromaticBondQuery());
return BOND_TOKEN; }
YY_BREAK
case 153:
case 154:
YY_RULE_SETUP
{ yylval->bond = new QueryBond(Bond::SINGLE);
yylval->bond->setBondDir(Bond::ENDUPRIGHT);
yylval->bond->setQuery(makeSingleOrAromaticBondQuery());
return BOND_TOKEN; }
YY_BREAK
case 154:
case 155:
YY_RULE_SETUP
{
yylval->bond = new QueryBond(Bond::DATIVER);
return BOND_TOKEN;
}
YY_BREAK
case 155:
case 156:
YY_RULE_SETUP
{
yylval->bond = new QueryBond(Bond::DATIVEL);
return BOND_TOKEN;
}
YY_BREAK
case 156:
case 157:
YY_RULE_SETUP
{ return MINUS_TOKEN; }
YY_BREAK
case 157:
case 158:
YY_RULE_SETUP
{ return PLUS_TOKEN; }
YY_BREAK
case 158:
case 159:
YY_RULE_SETUP
{ yy_push_state(IN_RECURSION_STATE,yyscanner); return BEGIN_RECURSE; }
YY_BREAK
case 159:
case 160:
YY_RULE_SETUP
{ yy_push_state(IN_BRANCH_STATE,yyscanner); return GROUP_OPEN_TOKEN; }
YY_BREAK
case 160:
case 161:
YY_RULE_SETUP
{ yy_pop_state(yyscanner); return GROUP_CLOSE_TOKEN; }
YY_BREAK
case 161:
case 162:
YY_RULE_SETUP
{ yy_pop_state(yyscanner); return END_RECURSE; }
YY_BREAK
case 162:
case 163:
YY_RULE_SETUP
{ return RANGE_OPEN_TOKEN; }
YY_BREAK
case 163:
case 164:
YY_RULE_SETUP
{ return RANGE_CLOSE_TOKEN; }
YY_BREAK
case 164:
case 165:
YY_RULE_SETUP
{ yy_push_state(IN_ATOM_STATE,yyscanner); return ATOM_OPEN_TOKEN; }
YY_BREAK
case 165:
case 166:
YY_RULE_SETUP
{ yy_pop_state(yyscanner); return ATOM_CLOSE_TOKEN; }
YY_BREAK
case 166:
case 167:
YY_RULE_SETUP
{ /* FIX: ???
This rule is here because otherwise recursive SMARTS queries like:
@@ -1641,39 +1654,39 @@ YY_RULE_SETUP
*/
return ATOM_CLOSE_TOKEN; }
YY_BREAK
case 167:
case 168:
YY_RULE_SETUP
{ return SEPARATOR_TOKEN; }
YY_BREAK
case 168:
case 169:
YY_RULE_SETUP
{ return PERCENT_TOKEN; }
YY_BREAK
case 169:
case 170:
YY_RULE_SETUP
{ yylval->ival = 0; return ZERO_TOKEN; }
YY_BREAK
case 170:
case 171:
YY_RULE_SETUP
{ yylval->ival = yytext[0]-'0'; return NONZERO_DIGIT_TOKEN; }
YY_BREAK
case 171:
case 172:
YY_RULE_SETUP
{ return NOT_TOKEN; }
YY_BREAK
case 172:
case 173:
YY_RULE_SETUP
{ return SEMI_TOKEN; }
YY_BREAK
case 173:
case 174:
YY_RULE_SETUP
{ return AND_TOKEN; }
YY_BREAK
case 174:
case 175:
YY_RULE_SETUP
{ return OR_TOKEN; }
YY_BREAK
case 175:
case 176:
YY_RULE_SETUP
{
yylval->atom = new QueryAtom();
@@ -1681,7 +1694,7 @@ YY_RULE_SETUP
return HYB_TOKEN;
}
YY_BREAK
case 176:
case 177:
YY_RULE_SETUP
{
yylval->atom = new QueryAtom();
@@ -1689,7 +1702,7 @@ YY_RULE_SETUP
return HYB_TOKEN;
}
YY_BREAK
case 177:
case 178:
YY_RULE_SETUP
{
yylval->atom = new QueryAtom();
@@ -1697,7 +1710,7 @@ YY_RULE_SETUP
return HYB_TOKEN;
}
YY_BREAK
case 178:
case 179:
YY_RULE_SETUP
{
yylval->atom = new QueryAtom();
@@ -1705,7 +1718,7 @@ YY_RULE_SETUP
return HYB_TOKEN;
}
YY_BREAK
case 179:
case 180:
YY_RULE_SETUP
{
yylval->atom = new QueryAtom();
@@ -1713,7 +1726,7 @@ YY_RULE_SETUP
return HYB_TOKEN;
}
YY_BREAK
case 180:
case 181:
YY_RULE_SETUP
{
yylval->atom = new QueryAtom();
@@ -1721,8 +1734,8 @@ YY_RULE_SETUP
return HYB_TOKEN;
}
YY_BREAK
case 181:
/* rule 181 can match eol */
case 182:
/* rule 182 can match eol */
YY_RULE_SETUP
return EOS_TOKEN;
YY_BREAK
@@ -1732,11 +1745,11 @@ case YY_STATE_EOF(IN_BRANCH_STATE):
case YY_STATE_EOF(IN_RECURSION_STATE):
{ return EOS_TOKEN; }
YY_BREAK
case 182:
case 183:
YY_RULE_SETUP
return BAD_CHARACTER;
YY_BREAK
case 183:
case 184:
YY_RULE_SETUP
YY_FATAL_ERROR( "flex scanner jammed" );
YY_BREAK
@@ -2037,7 +2050,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
if ( yy_current_state >= 221 )
if ( yy_current_state >= 222 )
yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
@@ -2066,11 +2079,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
if ( yy_current_state >= 221 )
if ( yy_current_state >= 222 )
yy_c = yy_meta[yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
yy_is_jam = (yy_current_state == 220);
yy_is_jam = (yy_current_state == 221);
(void)yyg;
return yy_is_jam ? 0 : yy_current_state;

View File

@@ -277,9 +277,14 @@ size_t setup_smarts_string(const std::string &text,yyscan_t yyscanner){
<IN_ATOM_STATE>r {
yylval->atom = new QueryAtom();
yylval->atom->setQuery(makeAtomInRingQuery());
return RINGSIZE_ATOM_QUERY_TOKEN;
return MIN_RINGSIZE_ATOM_QUERY_TOKEN;
}
<IN_ATOM_STATE>k {
yylval->atom = new QueryAtom();
yylval->atom->setQuery(makeAtomInRingQuery());
return RINGSIZE_ATOM_QUERY_TOKEN;
}
H { return H_TOKEN; }

File diff suppressed because it is too large Load Diff

View File

@@ -62,40 +62,41 @@ extern int yysmarts_debug;
ATOM_TOKEN = 263, /* ATOM_TOKEN */
SIMPLE_ATOM_QUERY_TOKEN = 264, /* SIMPLE_ATOM_QUERY_TOKEN */
COMPLEX_ATOM_QUERY_TOKEN = 265, /* COMPLEX_ATOM_QUERY_TOKEN */
RINGSIZE_ATOM_QUERY_TOKEN = 266, /* RINGSIZE_ATOM_QUERY_TOKEN */
RINGBOND_ATOM_QUERY_TOKEN = 267, /* RINGBOND_ATOM_QUERY_TOKEN */
IMPLICIT_H_ATOM_QUERY_TOKEN = 268, /* IMPLICIT_H_ATOM_QUERY_TOKEN */
HYB_TOKEN = 269, /* HYB_TOKEN */
HETERONEIGHBOR_ATOM_QUERY_TOKEN = 270, /* HETERONEIGHBOR_ATOM_QUERY_TOKEN */
ALIPHATIC = 271, /* ALIPHATIC */
ALIPHATICHETERONEIGHBOR_ATOM_QUERY_TOKEN = 272, /* ALIPHATICHETERONEIGHBOR_ATOM_QUERY_TOKEN */
ZERO_TOKEN = 273, /* ZERO_TOKEN */
NONZERO_DIGIT_TOKEN = 274, /* NONZERO_DIGIT_TOKEN */
GROUP_OPEN_TOKEN = 275, /* GROUP_OPEN_TOKEN */
GROUP_CLOSE_TOKEN = 276, /* GROUP_CLOSE_TOKEN */
SEPARATOR_TOKEN = 277, /* SEPARATOR_TOKEN */
RANGE_OPEN_TOKEN = 278, /* RANGE_OPEN_TOKEN */
RANGE_CLOSE_TOKEN = 279, /* RANGE_CLOSE_TOKEN */
HASH_TOKEN = 280, /* HASH_TOKEN */
MINUS_TOKEN = 281, /* MINUS_TOKEN */
PLUS_TOKEN = 282, /* PLUS_TOKEN */
H_TOKEN = 283, /* H_TOKEN */
AT_TOKEN = 284, /* AT_TOKEN */
PERCENT_TOKEN = 285, /* PERCENT_TOKEN */
ATOM_OPEN_TOKEN = 286, /* ATOM_OPEN_TOKEN */
ATOM_CLOSE_TOKEN = 287, /* ATOM_CLOSE_TOKEN */
NOT_TOKEN = 288, /* NOT_TOKEN */
AND_TOKEN = 289, /* AND_TOKEN */
OR_TOKEN = 290, /* OR_TOKEN */
SEMI_TOKEN = 291, /* SEMI_TOKEN */
BEGIN_RECURSE = 292, /* BEGIN_RECURSE */
END_RECURSE = 293, /* END_RECURSE */
COLON_TOKEN = 294, /* COLON_TOKEN */
UNDERSCORE_TOKEN = 295, /* UNDERSCORE_TOKEN */
BOND_TOKEN = 296, /* BOND_TOKEN */
CHI_CLASS_TOKEN = 297, /* CHI_CLASS_TOKEN */
BAD_CHARACTER = 298, /* BAD_CHARACTER */
EOS_TOKEN = 299 /* EOS_TOKEN */
MIN_RINGSIZE_ATOM_QUERY_TOKEN = 266, /* MIN_RINGSIZE_ATOM_QUERY_TOKEN */
RINGSIZE_ATOM_QUERY_TOKEN = 267, /* RINGSIZE_ATOM_QUERY_TOKEN */
RINGBOND_ATOM_QUERY_TOKEN = 268, /* RINGBOND_ATOM_QUERY_TOKEN */
IMPLICIT_H_ATOM_QUERY_TOKEN = 269, /* IMPLICIT_H_ATOM_QUERY_TOKEN */
HYB_TOKEN = 270, /* HYB_TOKEN */
HETERONEIGHBOR_ATOM_QUERY_TOKEN = 271, /* HETERONEIGHBOR_ATOM_QUERY_TOKEN */
ALIPHATIC = 272, /* ALIPHATIC */
ALIPHATICHETERONEIGHBOR_ATOM_QUERY_TOKEN = 273, /* ALIPHATICHETERONEIGHBOR_ATOM_QUERY_TOKEN */
ZERO_TOKEN = 274, /* ZERO_TOKEN */
NONZERO_DIGIT_TOKEN = 275, /* NONZERO_DIGIT_TOKEN */
GROUP_OPEN_TOKEN = 276, /* GROUP_OPEN_TOKEN */
GROUP_CLOSE_TOKEN = 277, /* GROUP_CLOSE_TOKEN */
SEPARATOR_TOKEN = 278, /* SEPARATOR_TOKEN */
RANGE_OPEN_TOKEN = 279, /* RANGE_OPEN_TOKEN */
RANGE_CLOSE_TOKEN = 280, /* RANGE_CLOSE_TOKEN */
HASH_TOKEN = 281, /* HASH_TOKEN */
MINUS_TOKEN = 282, /* MINUS_TOKEN */
PLUS_TOKEN = 283, /* PLUS_TOKEN */
H_TOKEN = 284, /* H_TOKEN */
AT_TOKEN = 285, /* AT_TOKEN */
PERCENT_TOKEN = 286, /* PERCENT_TOKEN */
ATOM_OPEN_TOKEN = 287, /* ATOM_OPEN_TOKEN */
ATOM_CLOSE_TOKEN = 288, /* ATOM_CLOSE_TOKEN */
NOT_TOKEN = 289, /* NOT_TOKEN */
AND_TOKEN = 290, /* AND_TOKEN */
OR_TOKEN = 291, /* OR_TOKEN */
SEMI_TOKEN = 292, /* SEMI_TOKEN */
BEGIN_RECURSE = 293, /* BEGIN_RECURSE */
END_RECURSE = 294, /* END_RECURSE */
COLON_TOKEN = 295, /* COLON_TOKEN */
UNDERSCORE_TOKEN = 296, /* UNDERSCORE_TOKEN */
BOND_TOKEN = 297, /* BOND_TOKEN */
CHI_CLASS_TOKEN = 298, /* CHI_CLASS_TOKEN */
BAD_CHARACTER = 299, /* BAD_CHARACTER */
EOS_TOKEN = 300 /* EOS_TOKEN */
};
typedef enum yytokentype yytoken_kind_t;
#endif

View File

@@ -153,7 +153,7 @@ yysmarts_error( const char *input,
%token <ival> AROMATIC_ATOM_TOKEN ORGANIC_ATOM_TOKEN
%token <atom> ATOM_TOKEN
%token <atom> SIMPLE_ATOM_QUERY_TOKEN COMPLEX_ATOM_QUERY_TOKEN
%token <atom> RINGSIZE_ATOM_QUERY_TOKEN RINGBOND_ATOM_QUERY_TOKEN IMPLICIT_H_ATOM_QUERY_TOKEN
%token <atom> MIN_RINGSIZE_ATOM_QUERY_TOKEN RINGSIZE_ATOM_QUERY_TOKEN RINGBOND_ATOM_QUERY_TOKEN IMPLICIT_H_ATOM_QUERY_TOKEN
%token <atom> HYB_TOKEN HETERONEIGHBOR_ATOM_QUERY_TOKEN ALIPHATIC ALIPHATICHETERONEIGHBOR_ATOM_QUERY_TOKEN
%token <ival> ZERO_TOKEN NONZERO_DIGIT_TOKEN
%token GROUP_OPEN_TOKEN GROUP_CLOSE_TOKEN SEPARATOR_TOKEN
@@ -593,6 +593,7 @@ atom_query: simple_atom
| COMPLEX_ATOM_QUERY_TOKEN
| HETERONEIGHBOR_ATOM_QUERY_TOKEN
| ALIPHATICHETERONEIGHBOR_ATOM_QUERY_TOKEN
| MIN_RINGSIZE_ATOM_QUERY_TOKEN
| RINGSIZE_ATOM_QUERY_TOKEN
| RINGBOND_ATOM_QUERY_TOKEN
| IMPLICIT_H_ATOM_QUERY_TOKEN
@@ -608,10 +609,14 @@ atom_query: simple_atom
$1->setQuery(makeAtomNumAliphaticHeteroatomNbrsQuery($2));
$$ = $1;
}
| RINGSIZE_ATOM_QUERY_TOKEN number {
| MIN_RINGSIZE_ATOM_QUERY_TOKEN number {
$1->setQuery(makeAtomMinRingSizeQuery($2));
$$ = $1;
}
| RINGSIZE_ATOM_QUERY_TOKEN number {
$1->setQuery(makeAtomInRingOfSizeQuery($2));
$$ = $1;
}
| RINGBOND_ATOM_QUERY_TOKEN number {
$1->setQuery(makeAtomRingBondCountQuery($2));
$$ = $1;
@@ -642,6 +647,35 @@ atom_query: simple_atom
$1->setQuery(nq);
$$ = $1;
}
/* "k" queries have to be handled differently */
| RINGSIZE_ATOM_QUERY_TOKEN RANGE_OPEN_TOKEN MINUS_TOKEN number RANGE_CLOSE_TOKEN {
int lv = -1;
int uv = $4;
ATOM_GREATEREQUAL_QUERY *nq = makeAtomSimpleQuery<ATOM_GREATEREQUAL_QUERY>(uv,[lv,uv](Atom const *at) {
return queryAtomIsInRingOfSize(at, lv, uv);
},std::string("greater_AtomRingSize"));
$1->setQuery(nq);
$$ = $1;
}
| RINGSIZE_ATOM_QUERY_TOKEN RANGE_OPEN_TOKEN number MINUS_TOKEN RANGE_CLOSE_TOKEN {
int lv = $3;
int uv = -1;
ATOM_LESSEQUAL_QUERY *nq = makeAtomSimpleQuery<ATOM_LESSEQUAL_QUERY>(lv,[lv,uv](Atom const *at) {
return queryAtomIsInRingOfSize(at, lv, uv);
},std::string("less_AtomRingSize"));
$1->setQuery(nq);
$$ = $1;
}
| RINGSIZE_ATOM_QUERY_TOKEN RANGE_OPEN_TOKEN number MINUS_TOKEN number RANGE_CLOSE_TOKEN {
int lv = $3;
int uv = $5;
ATOM_RANGE_QUERY *nq = makeAtomRangeQuery(lv,uv,false,false,[lv,uv](Atom const *at) {
return queryAtomIsInRingOfSize(at, lv, uv);
},std::string("range_AtomRingSize"));
$1->setQuery(nq);
$$ = $1;
}
| number H_TOKEN {
QueryAtom *newQ = new QueryAtom();
newQ->setQuery(makeAtomIsotopeQuery($1));
@@ -736,7 +770,7 @@ possible_range_query : COMPLEX_ATOM_QUERY_TOKEN
$1->setQuery(makeAtomNumAliphaticHeteroatomNbrsQuery(0));
$$ = $1;
}
| RINGSIZE_ATOM_QUERY_TOKEN {
| MIN_RINGSIZE_ATOM_QUERY_TOKEN {
$1->setQuery(makeAtomMinRingSizeQuery(5)); // this is going to be ignored anyway
$$ = $1;
}

View File

@@ -83,4 +83,91 @@ TEST_CASE("implicit Hs from SMILES should not make it into SMARTS") {
auto smarts = MolToSmarts(*m);
CHECK(smarts == "[#6]-[#6@H](-[#7])-[#9]");
}
}
}
void checkMatches(const std::string &smarts, const std::string &smiles,
unsigned int nMatches, unsigned int lenFirst,
bool addHs = false) {
// utility function that will find the matches between a smarts and smiles
// if they match the expected values
// smarts : smarts string
// smiles : smiles string
// nMatches : expected number of matches
// lenFirst : length of the first match
//
// Return the list of all matches just in case want to do additional testing
INFO(smarts + " " + smiles);
auto matcher = v2::SmilesParse::MolFromSmarts(smarts);
REQUIRE(matcher);
// we will at the same time test the serialization:
std::string pickle;
MolPickler::pickleMol(*matcher, pickle);
ROMol matcher2(pickle);
auto mol = v2::SmilesParse::MolFromSmiles(smiles);
REQUIRE(mol);
if (addHs) {
MolOps::addHs(*mol);
}
MolOps::findSSSR(*mol);
MatchVectType mV;
auto matches = SubstructMatch(*mol, *matcher, mV);
CHECK(matches);
CHECK(mV.size() == lenFirst);
std::vector<MatchVectType> mVV;
auto uniquify = true;
auto matchCount = SubstructMatch(*mol, *matcher, mVV, uniquify);
CHECK(matchCount == nMatches);
CHECK(mVV[0].size() == lenFirst);
matches = SubstructMatch(*mol, matcher2, mV);
CHECK(matches);
CHECK(mV.size() == lenFirst);
matchCount = SubstructMatch(*mol, matcher2, mVV, true);
CHECK(matchCount == nMatches);
CHECK(mVV[0].size() == lenFirst);
}
TEST_CASE("k SMARTS extensions") {
SECTION("parsing and writing") {
auto q = "[k4]"_smarts;
REQUIRE(q);
auto smarts = MolToSmarts(*q);
CHECK(smarts == "[k4]");
}
SECTION("matching") {
auto m = "C1CC2N1CCCCCC2"_smiles;
REQUIRE(m);
auto k4 = "[k4]"_smarts;
REQUIRE(k4);
std::vector<MatchVectType> matches;
CHECK(SubstructMatch(*m, *k4, matches));
CHECK(matches.size() == 4);
auto q4 = "[r4]"_smarts;
REQUIRE(q4);
CHECK(SubstructMatch(*m, *q4, matches));
CHECK(matches.size() == 4);
auto k8 = "[k8]"_smarts;
REQUIRE(k8);
CHECK(SubstructMatch(*m, *k8, matches));
CHECK(matches.size() == 8);
auto q8 = "[r8]"_smarts;
REQUIRE(q8);
CHECK(SubstructMatch(*m, *q8, matches));
CHECK(matches.size() == 6);
}
SECTION("ranges") {
std::string smiles = "C1CC2N1CCCCCC2C";
std::vector<std::pair<std::string, size_t>> smartses = {
{"[r{4-5}]", 4}, {"[k{4-5}]", 4}, {"[k{4-}]", 10}, {"[r{4-}]", 10},
{"[k{-5}]", 4}, {"[k{8-}]", 8}, {"[r{8-}]", 6}, {"[r{4-8}]", 10},
{"[!r{4-5}]", 7}, {"[!k{4-5}]", 7}, {"[!k{4-}]", 1}, {"[!r{4-}]", 1},
{"[!k{-5}]", 7}, {"[!k{8-}]", 3}, {"[!r{8-}]", 5}, {"[!r{4-8}]", 1},
{"[k]", 10}, {"[r]", 10}, {"[!k]", 1}, {"[!r]", 1},
};
for (const auto &[sma, val] : smartses) {
checkMatches(sma, smiles, val, 1);
}
}
}

View File

@@ -65,6 +65,8 @@ void testPass() {
"C%(1000)CC.C%(1000)", // github #2909
"[C;d2]", // non-hydrogen degree
"C$C", // quadruple bonds
"[k]", // ring size query extensions
"[k4]",
// extended chirality
"C[Fe@TH](O)(Cl)F", "C[Fe@TH1](O)(Cl)F", "C[Fe@SP](O)(Cl)F",
"C[Fe@SP1](O)(Cl)F", "C[Fe@TB](O)(Cl)(Br)F", "C[Fe@TB20](O)(Cl)(Br)F",

View File

@@ -36,3 +36,70 @@ TEST_CASE(
CHECK(q->Match(m->getAtomWithIdx(2)));
}
}
TEST_CASE("range queries for atom ring membership") {
auto m = "C1C2C1CC2C"_smiles;
REQUIRE(m);
SECTION("as range queries") {
{
std::unique_ptr<ATOM_RANGE_QUERY> q(makeAtomInRingOfSizeQuery(3, 4));
REQUIRE(q);
CHECK(q->Match(m->getAtomWithIdx(0)));
CHECK(q->Match(m->getAtomWithIdx(1)));
CHECK(q->Match(m->getAtomWithIdx(3)));
CHECK(!q->Match(m->getAtomWithIdx(5)));
}
{
std::unique_ptr<ATOM_RANGE_QUERY> q(makeAtomInRingOfSizeQuery(4, 6));
REQUIRE(q);
CHECK(!q->Match(m->getAtomWithIdx(0)));
CHECK(q->Match(m->getAtomWithIdx(1)));
CHECK(q->Match(m->getAtomWithIdx(3)));
CHECK(!q->Match(m->getAtomWithIdx(5)));
}
{
// this is how we use the queries in the SMARTS parser
std::unique_ptr<ATOM_LESSEQUAL_QUERY> q(
makeAtomSimpleQuery<ATOM_LESSEQUAL_QUERY>(3, [](Atom const *at) {
return queryAtomIsInRingOfSize(at, 3, -1);
}));
REQUIRE(q);
CHECK(q->Match(m->getAtomWithIdx(0)));
CHECK(q->Match(m->getAtomWithIdx(1)));
CHECK(q->Match(m->getAtomWithIdx(3)));
CHECK(!q->Match(m->getAtomWithIdx(5)));
}
{ // this is how we use the queries in the SMARTS parser
std::unique_ptr<ATOM_GREATEREQUAL_QUERY> q(
makeAtomSimpleQuery<ATOM_GREATEREQUAL_QUERY>(3, [](Atom const *at) {
return queryAtomIsInRingOfSize(at, -1, 3);
}));
REQUIRE(q);
CHECK(q->Match(m->getAtomWithIdx(0)));
CHECK(q->Match(m->getAtomWithIdx(1)));
CHECK(!q->Match(m->getAtomWithIdx(3)));
CHECK(!q->Match(m->getAtomWithIdx(5)));
}
}
SECTION("query function") {
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(0), 3, -1) == 3);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(1), 3, -1) == 3);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(3), 3, -1) == 4);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(5), 3, -1) == -1);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(0), -1, 3) == 3);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(1), -1, 3) == 3);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(3), -1, 3) ==
std::numeric_limits<int>::max());
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(5), -1, 3) ==
std::numeric_limits<int>::max());
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(0), 3, 4) == 3);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(1), 3, 4) == 3);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(3), 3, 4) == 4);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(5), 3, 4) == -1);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(3), 0, 4) == 4);
CHECK(queryAtomIsInRingOfSize(m->getAtomWithIdx(5), 0, 4) == -1);
}
}

View File

@@ -455,6 +455,7 @@ h "number of implicit hs" >0 Y
H "total number of Hs" 1
r "size of smallest SSSR ring" >0 Y
R "number of SSSR rings" >0 Y
k "size of SSSR ring" >0 Y extension
v "total valence" 1 Y
x "number of ring bonds" >0 Y
X "total degree" 1 Y