Updated cartridge documentation (#2635)

* Updated cartridge documentation Made examples compatible with latest chembl (25) and most recent conda versions of rdkit (2019.03.4.0, python 3.6.9) + rdkit-postgresql (2019.03.4.0) * updated more query results * updated more query results
2026-06-03 21:44:30 +08:00 · 2019-10-01 16:05:31 +02:00
parent 5dfd67a22a
commit 65a5f6030c
1 changed files with 180 additions and 208 deletions
--- a/Docs/Book/Cartridge.md
+++ b/Docs/Book/Cartridge.md
@@ -65,29 +65,29 @@ Start by downloading and installing the postgresql dump from the ChEMBL website

 Connect to the database, install the cartridge, and create the schema that we'll use:

-    chembl_23=# create extension if not exists rdkit;
-    chembl_23=# create schema rdk;
+    chembl_25=# create extension if not exists rdkit;
+    chembl_25=# create schema rdk;

 Create the molecules and build the substructure search index:

-    chembl_23=# select * into rdk.mols from (select molregno,mol_from_ctab(molfile::cstring) m  from compound_structures) tmp where m is not null;
-    SELECT 1727081
-    chembl_23=# create index molidx on rdk.mols using gist(m);
+    chembl_25=# select * into rdk.mols from (select molregno,mol_from_ctab(molfile::cstring) m  from compound_structures) tmp where m is not null;
+    SELECT 1870451
+    chembl_25=# create index molidx on rdk.mols using gist(m);
    CREATE INDEX
-    chembl_23=# alter table rdk.mols add primary key (molregno);
+    chembl_25=# alter table rdk.mols add primary key (molregno);
    ALTER TABLE

 Create some fingerprints and build the similarity search index:

-    chembl_23=# select molregno,torsionbv_fp(m) as torsionbv,morganbv_fp(m) as mfp2,featmorganbv_fp(m) as ffp2 into rdk.fps from rdk.mols;
-    SELECT 1727081
-    chembl_23=# create index fps_ttbv_idx on rdk.fps using gist(torsionbv);
+    chembl_25=# select molregno,torsionbv_fp(m) as torsionbv,morganbv_fp(m) as mfp2,featmorganbv_fp(m) as ffp2 into rdk.fps from rdk.mols;
+    SELECT 1870451
+    chembl_25=# create index fps_ttbv_idx on rdk.fps using gist(torsionbv);
    CREATE INDEX
-    chembl_23=# create index fps_mfp2_idx on rdk.fps using gist(mfp2);
+    chembl_25=# create index fps_mfp2_idx on rdk.fps using gist(mfp2);
    CREATE INDEX
-    chembl_23=# create index fps_ffp2_idx on rdk.fps using gist(ffp2);
+    chembl_25=# create index fps_ffp2_idx on rdk.fps using gist(ffp2);
    CREATE INDEX
-    chembl_23=# alter table rdk.fps add primary key (molregno);
+    chembl_25=# alter table rdk.fps add primary key (molregno);
    ALTER TABLE

 Here is a group of the commands used here (and below) in one block so that you can just paste it in at the psql prompt:
@@ -103,7 +103,7 @@ Here is a group of the commands used here (and below) in one block so that you c
    create index fps_ffp2_idx on rdk.fps using gist(ffp2);
    alter table rdk.fps add primary key (molregno);
    create or replace function get_mfp2_neighbors(smiles text)
-    returns table(molregno integer, m mol, similarity double precision) as
+    returns table(molregno bigint, m mol, similarity double precision) as
    $$
    select molregno,m,tanimoto_sml(morganbv_fp(mol_from_smiles($1::cstring)),mfp2) as similarity
    from rdk.fps join rdk.mols using (molregno)
@@ -115,52 +115,52 @@ Here is a group of the commands used here (and below) in one block so that you c

 Example query molecules taken from the [eMolecules home page](http://www.emolecules.com/):

-    chembl_23=# select count(*) from rdk.mols where m@>'c1cccc2c1nncc2' ;
+    chembl_25=# select count(*) from rdk.mols where m@>'c1cccc2c1nncc2' ;
     count
    -------
-       447
+       461
    (1 row)

    Time: 107.602 ms
-    chembl_23=# select count(*) from rdk.mols where m@>'c1ccnc2c1nccn2' ;
+    chembl_25=# select count(*) from rdk.mols where m@>'c1ccnc2c1nccn2' ;
     count
    -------
-      1013
+      1124
    (1 row)

    Time: 216.222 ms
-    chembl_23=# select count(*) from rdk.mols where m@>'c1cncc2n1ccn2' ;
+    chembl_25=# select count(*) from rdk.mols where m@>'c1cncc2n1ccn2' ;
     count
    -------
-      1775
+      2233
    (1 row)

    Time: 88.266 ms
-    chembl_23=# select count(*) from rdk.mols where m@>'Nc1ncnc(N)n1' ;
+    chembl_25=# select count(*) from rdk.mols where m@>'Nc1ncnc(N)n1' ;
     count
    -------
-      5842
+      7095
    (1 row)

    Time: 327.855 ms
-    chembl_23=# select count(*) from rdk.mols where m@>'c1scnn1' ;
+    chembl_25=# select count(*) from rdk.mols where m@>'c1scnn1' ;
     count
    -------
-     15962
+     16526
    (1 row)

    Time: 568.675 ms
-    chembl_23=# select count(*) from rdk.mols where m@>'c1cccc2c1ncs2' ;
+    chembl_25=# select count(*) from rdk.mols where m@>'c1cccc2c1ncs2' ;
     count
    -------
-     18986
+     20745
    (1 row)

    Time: 998.104 ms
-    chembl_23=# select count(*) from rdk.mols where m@>'c1cccc2c1CNCCN2' ;
+    chembl_25=# select count(*) from rdk.mols where m@>'c1cccc2c1CNCCN2' ;
     count
    -------
-      1613
+      1788
    (1 row)

    Time: 1922.273 ms
@@ -171,45 +171,42 @@ Given we're searching through 1.7 million compounds these search times aren't in

 One easy way to speed things up, particularly for queries that return a large number of results, is to only retrieve a limited number of results:

-    chembl_23=# select * from rdk.mols where m@>'c1cccc2c1CNCCN2' limit 100;
-     molregno |                                                                                             m                                                             
-
-    ----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------
-    --------------------------------
-       908048 | O=C1CN(C(=O)c2ccc(Br)o2)C(c2ccc(F)cc2)c2cc(F)ccc2N1
-       931972 | Cl.c1ccc(CC2CNc3ccccc3CN2)cc1
-       904450 | CCOC(=O)[C@H]1[C@H]2COc3ccc(Cl)cc3[C@@H]2N2C(=O)c3ccc(Cl)cc3NC(=O)[C@@]12C
-       226391 | C/C=C1/CC2C(OC)Nc3cc(OC)c(OC)cc3C(=O)N2C1
-       930820 | CN1CC(=O)N(CC(=O)Nc2ccc(N(C)C)cc2)c2ccccc2C1=O
-        18576 | CO[C@H]1Nc2c(ccc(C)c2O)C(=O)N2C=C(/C=C/C(N)=O)C[C@@H]12
-       249934 | O=C(c1cccc2ccccc12)N1CCN(Cc2cncn2Cc2ccccc2)c2ccccc2C1
-       ...
-        91020 | CC(C)C[C@H]1C(=O)N2c3ccccc3[C@@](O)(C[C@@H]3NC(=O)c4ccccc4N4C(=O)c5ccccc5NC34)[C@H]2N1C(=O)C(CCCNC(=O)OCc1ccccc1)NC(=O)OC(C)(C)C
-        91225 | CC(C)C[C@H]1C(=O)N2c3ccccc3[C@@](O)(C[C@@H]3NC(=O)c4ccccc4N4C(=O)c5ccccc5NC34)[C@H]2N1C(=O)CCC(=O)[O-].[Na+]
-       348798 | O=C(O)CN1C(=O)C(c2ccc(Cl)cc2)N(C(C(=O)O)c2ccc(Cl)cc2)C(=O)c2cc(I)ccc21
-       348972 | C[C@H](c1ccc(Cl)cc1)N1C(=O)c2cc(I)ccc2N(CCCCC(=O)O)C(=O)[C@@H]1c1ccc(C(F)(F)F)cc1
-
-    ...skipping 23 lines
+    chembl_25=# select * from rdk.mols where m@>'c1cccc2c1CNCCN2' limit 100;
+     molregno |                                                      m                                                       
+    ----------+--------------------------------------------------------------------------------------------------------------
+      1671940 | Cc1cccc(C)c1N1C(=O)c2ccccc2NC(=O)C1C(=O)NCc1ccco1
+      1318078 | COCN1C(=O)[C@@H]2C[C@@H](O)CN2C(=O)c2ccccc21
+      1318783 | O/N=C1/Nc2ccccc2C(=S)N2CSCC12
+      1318127 | CC(=O)O[C@H]1C[C@H]2C(=S)Nc3ccccc3C(=S)N2C1
+      1308578 | O=C1Nc2cc([N+](=O)[O-])ccc2C(=O)N2CCC[C@@H]12
+      1417168 | O=C(NCC(F)(F)F)C1C(=O)Nc2ccccc2C(=O)N1Cc1ccccc1
+      ...
+       793329 | Cc1ccc2c(c1)C(c1ccccc1)N(C(=O)c1ccc(OC(C)C)cc1)CC(=O)N2
+       921215 | O=C1CN(C(=O)c2cc([N+](=O)[O-])ccc2Cl)C(c2ccc(F)cc2)c2cc(F)ccc2N1
+       790949 | CCOC(=O)[C@H]1[C@H]2COc3ccc(Cl)cc3[C@@H]2N2C(=O)c3cc(C)ccc3NC(=O)[C@@]12C
+       760998 | CC(=O)N1CC(=O)Nc2ccc(Cl)cc2C1c1ccc(F)cc1
+    (100 rows)
+    
    Time: 97.357 ms

 #### SMARTS-based queries

 Oxadiazole or thiadiazole:

-    chembl_23=# select * from rdk.mols where m@>'c1[o,s]ncn1'::qmol limit 500;
-     molregno |                                                      m                                                       
-    ----------+--------------------------------------------------------------------------------------------------------------
-      1370170 | Fc1cccc(-c2nc(NCC3COc4ccccc4O3)no2)c1F
-      1370417 | COc1cc(CN2CCC(Cc3nc(-c4ccc5c(c4)CCO5)no3)C2)ccc1F
-      1370526 | Cl.Cn1cc(-c2noc(/C=C3/CCN4CCCC[C@@H]4C3)n2)c2ccccc21
-      1379267 | CCC(c1ccccc1)c1noc(CCN(CC)CC)n1
-      1404150 | OC[C@H]1O[C@H](c2nc(-c3nc(-c4cccs4)no3)cs2)C[C@@H]1O
-      1217463 | CC(C)(C)c1ccc(-c2noc(CCC(=O)N3CCCCC3)n2)cc1
+    chembl_25=# select * from rdk.mols where m@>'c1[o,s]ncn1'::qmol limit 500;
+     molregno |                                                 m
+    ----------+---------------------------------------------------------------------------------------------------
+      1882516 | COc1cccc(CN(C)Cc2nc(C(C)C)no2)c1
+      2194441 | Cc1nc([C@](C)(O)C#Cc2ccc3c(c2)-c2nc(C(N)=O)sc2[C@@H](F)CO3)no1
+      1881742 | CCOc1ccc(C(F)(F)F)cc1NC(=O)NCc1noc(C)n1
+      1949861 | FC(F)(F)c1ccc(-c2nc(-c3ccc4nc[nH]c4c3)no2)cc1
+      1949860 | FC(F)(F)c1cccc(-c2nc(-c3ccc4nc[nH]c4c3)no2)c1
+      2172627 | O=c1[nH]cc(-c2cc(Cl)ccc2Oc2cc(F)c(S(=O)(=O)Nc3ncns3)cc2F)n2cncc12
      ...
-      1517753 | CC(C)c1noc(N2CCC(CO[C@H]3CC[C@H](c4ccc(S(C)(=O)=O)cc4F)CC3)CC2)n1
-      1263024 | COc1cc(Nc2nc3c(s2)CCCC3c2ccccc2)ccc1-c1nc(C)no1
-      1264016 | O=C(O)CCc1nc2cc(-c3noc(-c4cc(C(F)(F)F)cc(C(F)(F)F)c4)n3)ccc2[nH]1
-      1847733 | Cc1cc(-c2noc([C@H]3CCCCN3C(=O)COc3ccccc3)n2)no1
+      1848026 | O=C1CCCN1c1cccc(-c2noc([C@H]3CCCCN3C(=O)COc3ccccc3)n2)c1
+      1848027 | O=C1CN(c2cccc(-c3noc([C@H]4CCCCN4C(=O)COc4ccccc4)n3)c2)C(=O)N1
+      1848036 | CN(C)C(=O)CCC(=O)Nc1cc(F)cc(-c2noc([C@H]3CCCCN3C(=O)COc3ccccc3)n2)c1
+      1852688 | CC(Sc1nc(N)cc(N)n1)c1nc(C(C)(C)C)no1
    (500 rows)

    Time: 761.847 ms
@@ -220,64 +217,39 @@ This is slower than the pure SMILES query, this is generally true of SMARTS-base

 Note that by default stereochemistry is not taken into account when doing substructure queries:

-    chembl_23=# select * from rdk.mols where m@>'NC(=O)[C@@H]1CCCN1C=O' limit 10;
-     molregno |                                                                                                                                                           
-                   m                                                                                                                                                      
-
-    ----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------
-    ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-    ---------------------
-        87611 | CNCC(=O)N[C@@H](CCCN=C(N)N)C(=O)N1C[C@H]2C[C@H]1C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@H](C(=O)N[C@@H](Cc1c[nH]cn1)C(=O)N1CCC[C@H]1C(=O)N[C@@H](Cc1ccccc1)C(=O
-    )O)CCSS2
-        88372 | CNCCCC[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)[C@@H](CCCCNC)NC(=O)[C@H](Cc1ccc(O)cc1)NC(=O)[C@H](CO)NC(=O)[C@@H](Cc1ccccc1)NC(=O)[C@@H](Cc1ccccc1)NC(=O)[C@@H](C
-    c1ccc2ccccc2c1)NC(C)=O)C(=O)N1CCC[C@@H]1C(=O)N[C@H](C)C(=O)O
-        88322 | CC(=O)N[C@H](Cc1ccc2ccccc2c1)C(=O)N[C@H](Cc1ccccc1)C(=O)N[C@H](Cc1ccccc1)C(=O)N[C@@H](CO)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@H](CCCCNC(C)C)C(=O)N[C@@H](Cc1
-    ccccc1)C(=O)N[C@@H](CCCCNC(C)C)C(=O)N1CCC[C@@H]1C(=O)N[C@H](C)C(=O)O
-        88168 | CC(=O)N[C@H](Cc1ccc2ccccc2c1)C(=O)N[C@H](Cc1ccccc1)C(=O)N[C@H](Cc1ccccc1)C(=O)N[C@@H](CO)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@H](CCCN=C(N)N)C(=O)N[C@@H](Cc1
-    ccccc1)C(=O)N[C@@H](CCCCNC1CCCC1)C(=O)N1CCC[C@@H]1C(=O)N[C@H](C)C(=O)O
-        88150 | CC(=O)N[C@H](Cc1ccc2ccccc2c1)C(=O)N[C@H](Cc1ccccc1)C(=O)N[C@H](Cc1ccccc1)C(=O)N[C@@H](CO)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@H](CCCN=C(N)N)C(=O)N[C@@H](Cc1
-    ccccc1)C(=O)N[C@@H](CCCCNCc1ccc(C)cc1)C(=O)N1CCC[C@@H]1C(=O)N[C@H](C)C(=O)O
-        88373 | CC(=O)N[C@H](Cc1ccc2ccccc2c1)C(=O)N[C@H](Cc1ccccc1)C(=O)N[C@H](Cc1ccccc1)C(=O)N[C@@H](CO)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@H](CCCCNC1CCCCC1)C(=O)N[C@@H](
-    Cc1ccccc1)C(=O)N[C@@H](CCCCNC1CCCCC1)C(=O)N1CCC[C@@H]1C(=O)N[C@H](C)C(=O)O
-        93377 | CC(=O)N[C@@H](Cc1ccc([N+](=O)[O-])cc1)C(=O)N1CCC[C@H]1C(=O)N[C@@H](CCC/N=C(/N)NS(=O)(=O)c1c(C)c(C)c2c(c1C)CCC(C)(C)O2)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](
-    CCC/N=C(/N)NS(=O)(=O)c1c(C)c(C)c2c(c1C)CCC(C)(C)O2)C(=O)N[C@H](C(=O)NCC(=O)N[C@@H](COC(C)(C)C)C(=O)N[C@@H](CCCCNC(=O)c1ccccc1N)C(=O)NCC(=O)O)[C@@H](C)OC(C)(C)C
-        94493 | CC(C)C[C@@H]1NC(=O)[C@H]([C@@H](C)O)NC(=O)[C@H](Cc2c[nH]c3ccccc23)NC(=O)[C@H](C(C)C)NC(=O)[C@H](NC(=O)[C@H](CCCCN)NC(=O)[C@@H]2CCCN2C(=O)[C@H](CCC(N)=O)NC
-    (=O)CNC(=O)CN)CSSC[C@@H](C(=O)N[C@@H](Cc2ccc(O)cc2)C(=O)N[C@@H](CO)C(=O)N[C@H](C(=O)NCC(=O)NCC(N)=O)[C@@H](C)O)NC(=O)[C@H](Cc2c[nH]cn2)NC(=O)[C@H](Cc2ccccc2)NC(=O)CNC
-    (=O)[C@@H]2CCCN2C1=O
-
-    ...skipping 1 line
-        89559 | CC1(C)SSC(C)(C)[C@@H](C(=O)N[C@@H](Cc2c[nH]cn2)C(=O)N2CCC[C@H]2C(=O)N[C@@H](Cc2ccccc2)C(=O)O)NC(=O)[C@H](Cc2ccc(O)cc2)NC(=O)[C@H]1NC(=O)[C@H](CCCN=C(N)N)N
-    C(=O)[C@@H](N)CC(=O)O
+    chembl_25=# select * from rdk.mols where m@>'NC(=O)[C@@H]1CCCN1C=O' limit 10;
+     molregno |                                                 m
+    ----------+---------------------------------------------------------------------------------------------------
+      2213985 | CC[C@H](C)[C@@H]1NC(=O)[C@@H]2CCCN2C(=O)[C@@H]2CCCN2C(=O)[C@H]([C@@H](C)CC)NC(=O)[C@H](CO)NC(=O)[C@H](C)NC(=O)[C@H]([C@H](C)O)NC(=O)[C@@H]2CSSC[C@H](NC1=O)C(=O)N[C@@H](Cc1cnc[nH]1)C(=O)N[C@H](Cc1ccccc1)C(=O)N[C@@H](CCCNC(=N)N)C(=O)N[C@@H](Cc1c[nH]c3ccccc13)C(=O)N[C@@H](CCCNC(=N)N)C(=O)N2
+      1956682 | NC(=O)[C@@H]1CCCN1C(=O)[C@H](Cc1nc(I)[nH]c1I)NC(=O)c1cnccn1
+      2212188 | CN1C(=O)[C@H](CCCNC(=N)N)NC(=O)[C@@H](Cc2ccc(O)cc2)NC(=O)[C@@H]2CCCN2C(=O)[C@H](Cc2ccc3ccccc3c2)NC(=O)[C@@H]1CC(=O)O
+      2053463 | NCCCC[C@H](NC(=O)[C@H](Cc1ccc(OP(=O)(O)O)cc1)NC(=O)Cc1ccccc1)C(=O)N1CCC[C@H]1C(=O)N[C@@H](Cc1ccccc1)C(N)=O
+      2060743 | CCCCCCCCCCCCCCCCNC(=O)CN(CC(=O)NC(C)(C)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CC(C)C)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCCNC(=N)N)C(=O)N1CCC[C@H]1C(=O)N[C@@H](CCCNC(=N)N)C(=O)N[C@@H](CC(N)=O)C(N)=O)C(=O)c1cccnc1
+      2060744 | CCCCCCCCCCCCCCCCN(CCCCCCCCCCCCCCCC)CCCCCC(=O)NC(C)(C)C(=O)NC(Cc1ccccc1)C(=O)NC(CC(C)C)C(=O)NC(Cc1ccccc1)C(=O)NC(CCCNC(=N)N)C(=O)N1CCCC1C(=O)NC(CCCNC(=N)N)C(=O)NC(CC(N)=O)C(N)=O
+      2077784 | CC[C@H](C)[C@@H]1NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](CC(C)C)NC(=O)[C@@H]2CCCN2C(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](C(C)C)NC(=O)[C@H](CC(C)C)NC(=O)[C@H](CCSC)NC1=O
+      2077779 | CC[C@H](C)[C@@H]1NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](CC(C)C)NC(=O)[C@@H]2CCCN2C(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](C(C)C)NC(=O)[C@H](CC(C)C)NC(=O)[C@H](CC[S+](C)[O-])NC1=O
+      2077782 | CC[C@H](C)[C@@H]1NC(=O)[C@H](Cc2c[nH]c3ccccc23)NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@@H]2CCCN2C(=O)[C@H](CCSC)NC(=O)[C@H](CC(C)C)NC(=O)[C@H](CC[S+](C)[O-])NC1=O
+      2077780 | CC(C)C[C@@H]1NC(=O)[C@H](CC[S+](C)[O-])NC(=O)[C@H](C(C)C)NC(=O)[C@H](Cc2c[nH]c3ccccc23)NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@@H]2CCCN2C(=O)[C@H](CC[S+](C)[O-])NC1=O
    (10 rows)
-
+    
 This can be changed using the rdkit.do\_chiral\_sss configuration variable:

-    chembl_23=# set rdkit.do_chiral_sss=true;
+    chembl_25=# set rdkit.do_chiral_sss=true;
    SET
    Time: 0.241 ms
-    chembl_23=# select * from rdk.mols where m@>'NC(=O)[C@@H]1CCCN1C=O' limit 10;
-     molregno |                                                                                                                                                              
-                m                                                                                                                                                            
-
-    ----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------
-    -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-    ---------------
-        87611 | CNCC(=O)N[C@@H](CCCN=C(N)N)C(=O)N1C[C@H]2C[C@H]1C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@H](C(=O)N[C@@H](Cc1c[nH]cn1)C(=O)N1CCC[C@H]1C(=O)N[C@@H](Cc1ccccc1)C(=O)O)
-    CCSS2
-        93377 | CC(=O)N[C@@H](Cc1ccc([N+](=O)[O-])cc1)C(=O)N1CCC[C@H]1C(=O)N[C@@H](CCC/N=C(/N)NS(=O)(=O)c1c(C)c(C)c2c(c1C)CCC(C)(C)O2)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC
-    /N=C(/N)NS(=O)(=O)c1c(C)c(C)c2c(c1C)CCC(C)(C)O2)C(=O)N[C@H](C(=O)NCC(=O)N[C@@H](COC(C)(C)C)C(=O)N[C@@H](CCCCNC(=O)c1ccccc1N)C(=O)NCC(=O)O)[C@@H](C)OC(C)(C)C
-        94493 | CC(C)C[C@@H]1NC(=O)[C@H]([C@@H](C)O)NC(=O)[C@H](Cc2c[nH]c3ccccc23)NC(=O)[C@H](C(C)C)NC(=O)[C@H](NC(=O)[C@H](CCCCN)NC(=O)[C@@H]2CCCN2C(=O)[C@H](CCC(N)=O)NC(=O
-    )CNC(=O)CN)CSSC[C@@H](C(=O)N[C@@H](Cc2ccc(O)cc2)C(=O)N[C@@H](CO)C(=O)N[C@H](C(=O)NCC(=O)NCC(N)=O)[C@@H](C)O)NC(=O)[C@H](Cc2c[nH]cn2)NC(=O)[C@H](Cc2ccccc2)NC(=O)CNC(=O)[C
-    @@H]2CCCN2C1=O
-        89558 | NC(N)=NCCC[C@H](NC(=O)[C@@H](N)CC(=O)O)C(=O)N[C@H]1CCSSC[C@@H](C(=O)N[C@@H](Cc2c[nH]cn2)C(=O)N2CCC[C@H]2C(=O)N[C@@H](Cc2ccccc2)C(=O)O)NC(=O)[C@H](Cc2ccc(O)cc
-    2)NC1=O
-        89559 | CC1(C)SSC(C)(C)[C@@H](C(=O)N[C@@H](Cc2c[nH]cn2)C(=O)N2CCC[C@H]2C(=O)N[C@@H](Cc2ccccc2)C(=O)O)NC(=O)[C@H](Cc2ccc(O)cc2)NC(=O)[C@H]1NC(=O)[C@H](CCCN=C(N)N)NC(=
-    O)[C@@H](N)CC(=O)O
-       126618 | NC(=O)[C@@H]1CCCN1C(=O)[C@@H]1CCCN1C(=O)[C@@H](O)[C@H](N)Cc1ccccc1
-       152339 | O=C(O)CN[C@H](CC1CCCCC1)C(=O)N1CCC[C@H]1C(=O)NCCCc1c[nH]cn1
-       152504 | N[C@H](CC1CCCCC1)C(=O)N1[C@H](C(=O)NC/C=C/c2c[nH]cn2)C[C@@H]2CCCC[C@@H]21
-       152383 | N[C@H](CC1CCCCC1)C(=O)N1CCC[C@H]1C(=O)NCCCCc1c[nH]cn1
-       151837 | N[C@H](CC1CCCCC1)C(=O)N1CCC[C@H]1C(=O)NC/C=C/c1c[nH]cn1
+    chembl_25=# select * from rdk.mols where m@>'NC(=O)[C@@H]1CCCN1C=O' limit 10;
+     molregno |                                                 m
+    ----------+---------------------------------------------------------------------------------------------------
+      2213985 | CC[C@H](C)[C@@H]1NC(=O)[C@@H]2CCCN2C(=O)[C@@H]2CCCN2C(=O)[C@H]([C@@H](C)CC)NC(=O)[C@H](CO)NC(=O)[C@H](C)NC(=O)[C@H]([C@H](C)O)NC(=O)[C@@H]2CSSC[C@H](NC1=O)C(=O)N[C@@H](Cc1cnc[nH]1)C(=O)N[C@H](Cc1ccccc1)C(=O)N[C@@H](CCCNC(=N)N)C(=O)N[C@@H](Cc1c[nH]c3ccccc13)C(=O)N[C@@H](CCCNC(=N)N)C(=O)N2
+      1956682 | NC(=O)[C@@H]1CCCN1C(=O)[C@H](Cc1nc(I)[nH]c1I)NC(=O)c1cnccn1
+      2212188 | CN1C(=O)[C@H](CCCNC(=N)N)NC(=O)[C@@H](Cc2ccc(O)cc2)NC(=O)[C@@H]2CCCN2C(=O)[C@H](Cc2ccc3ccccc3c2)NC(=O)[C@@H]1CC(=O)O
+      2053463 | NCCCC[C@H](NC(=O)[C@H](Cc1ccc(OP(=O)(O)O)cc1)NC(=O)Cc1ccccc1)C(=O)N1CCC[C@H]1C(=O)N[C@@H](Cc1ccccc1)C(N)=O
+      2060743 | CCCCCCCCCCCCCCCCNC(=O)CN(CC(=O)NC(C)(C)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CC(C)C)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCCNC(=N)N)C(=O)N1CCC[C@H]1C(=O)N[C@@H](CCCNC(=N)N)C(=O)N[C@@H](CC(N)=O)C(N)=O)C(=O)c1cccnc1
+      2077784 | CC[C@H](C)[C@@H]1NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](CC(C)C)NC(=O)[C@@H]2CCCN2C(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](C(C)C)NC(=O)[C@H](CC(C)C)NC(=O)[C@H](CCSC)NC1=O
+      2077779 | CC[C@H](C)[C@@H]1NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](CC(C)C)NC(=O)[C@@H]2CCCN2C(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](C(C)C)NC(=O)[C@H](CC(C)C)NC(=O)[C@H](CC[S+](C)[O-])NC1=O
+      2077782 | CC[C@H](C)[C@@H]1NC(=O)[C@H](Cc2c[nH]c3ccccc23)NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@@H]2CCCN2C(=O)[C@H](CCSC)NC(=O)[C@H](CC(C)C)NC(=O)[C@H](CC[S+](C)[O-])NC1=O
+      2077780 | CC(C)C[C@@H]1NC(=O)[C@H](CC[S+](C)[O-])NC(=O)[C@H](C(C)C)NC(=O)[C@H](Cc2c[nH]c3ccccc23)NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@H](Cc2ccccc2)NC(=O)[C@@H]2CCCN2C(=O)[C@H](CC[S+](C)[O-])NC1=O
+      2211488 | CC[C@H](C)[C@H](N)C(=O)N[C@H](C(=O)N[C@@H](CC(C)C)C(=O)N[C@H](C(=O)N1CCC[C@H]1C(=O)N1CCC[C@H]1C(=O)N[C@H](CCC(=O)N[C@@H](CCC(=O)N[C@@H](CC(C)C)C(=O)O)Cc1ccccc1)Cc1ccccc1)C(C)C)[C@@H](C)CC
    (10 rows)

    Time: 6.181 ms
@@ -289,19 +261,19 @@ having to construct complex SMARTS queries. The cartridge function `mol_adjust_q
 can be used to do just this. Here is an example of the default behavior, using a  
 query for 2,6 di-substituted pyridines:

-    chembl_23=# select molregno,m from rdk.mols where m@>mol_adjust_query_properties('*c1cccc(NC(=O)*)n1') limit 10;
-     molregno |                                             m                                             
-    ----------+-------------------------------------------------------------------------------------------
-      1993749 | Cn1c(Nc2c(Cl)ccc(CNC(=O)C(C)(C)C)c2Cl)nc2cc(C(=O)Nc3cccc(C(F)(F)F)n3)c(N3CCC(F)(F)C3)cc21
-      1988455 | Cc1cccc(C(=O)Nc2cccc(Oc3cccnc3)n2)c1
-      1870095 | COC(=O)CN(C(=O)C(C)c1c(F)cccc1F)c1cccc(C)n1
-      1870023 | CCC(C)C(=O)N(CC(=O)OC)c1cccc(C)n1
-      1873944 | Cc1ccc(C(=O)N(C)CC(=O)Nc2cccc(C)n2)cn1
-      1873968 | Cc1cccc(NC(=O)CN(C)C(=O)c2ccc(-n3cccc3)nc2)n1
-      1882693 | Cc1cccc(NC(=O)CCNCc2c(C)nn(C)c2N(C)C)n1
-      1882711 | COc1c(CNCCC(=O)Nc2cccc(C)n2)c(C)nn1C
-      1868705 | CCOc1cccc(NC(=O)c2cnc(C)cn2)n1
-      1875177 | Cc1cccc(NC(=O)[C@@H]2CCCN2Cc2nc(C)c(C)o2)n1
+    chembl_25=# select molregno,m from rdk.mols where m@>mol_adjust_query_properties('*c1cccc(NC(=O)*)n1') limit 10;
+     molregno |                                                 m
+    ----------+---------------------------------------------------------------------------------------------------
+      1609520 | Cc1cccc(NC(=O)c2cc(Br)ccc2C(=O)O)n1
+      1141456 | CCN(CC)CCCn1cc(NC(=O)Nc2cccc(-c3ccccc3)n2)c2ccccc21
+      1431198 | Cc1cccc(NC(=O)c2nc(C)sc2Nc2cccnc2)n1
+       734975 | Cc1cccc(NC(=O)CN(C)S(=O)(=O)c2ccc(Cl)cc2)n1
+       760426 | Cc1cccc(NC(=O)CCCn2cc([N+](=O)[O-])cn2)n1
+       782786 | Cc1cccc(NC(=O)CN2C(=O)NC(C)(c3ccc4ccccc4c3)C2=O)n1
+      1478990 | Cc1cccc(NC(=O)Cn2c(=O)sc3cc(C(=O)c4ccccc4)ccc32)n1
+      1478787 | Cc1cccc(NC(=O)Cn2c(=O)sc3cc(C(=O)c4ccccc4F)ccc32)n1
+      1955608 | C[C@H](N)C(=O)Nc1cccc(N)n1
+       773911 | Cc1cccc(NC(=O)c2c(-c3ccccc3)noc2C)n1
    (10 rows)

    Time: 11.895 ms
@@ -315,20 +287,20 @@ By default `mol_adjust_query_properties()` makes the following changes to the mo
 We can control the behavior by providing an additional JSON argument. Here's an example
 where we disable the additional degree queries:

-    chembl_23=# select molregno,m from rdk.mols where m@>mol_adjust_query_properties('*c1cccc(NC(=O)*)n1',
-    chembl_23(# '{"adjustDegree":false}') limit 10;
-     molregno |                                             m                                             
-    ----------+-------------------------------------------------------------------------------------------
-      1993749 | Cn1c(Nc2c(Cl)ccc(CNC(=O)C(C)(C)C)c2Cl)nc2cc(C(=O)Nc3cccc(C(F)(F)F)n3)c(N3CCC(F)(F)C3)cc21
-      1957849 | COc1ccc2ncc(F)c(C[C@H](O)C3CCC(NCc4nc5c(cc4F)OCC(=O)N5)CO3)c2n1
-      1959611 | O=C1COc2ccc(CNC3CCN(CCn4c(=O)ccc5ncc(OCc6cccnn6)cc54)CC3)nc2N1
-      1988455 | Cc1cccc(C(=O)Nc2cccc(Oc3cccnc3)n2)c1
-      1870095 | COC(=O)CN(C(=O)C(C)c1c(F)cccc1F)c1cccc(C)n1
-      1870023 | CCC(C)C(=O)N(CC(=O)OC)c1cccc(C)n1
-      1873944 | Cc1ccc(C(=O)N(C)CC(=O)Nc2cccc(C)n2)cn1
-      1873968 | Cc1cccc(NC(=O)CN(C)C(=O)c2ccc(-n3cccc3)nc2)n1
-      1882693 | Cc1cccc(NC(=O)CCNCc2c(C)nn(C)c2N(C)C)n1
-      1882711 | COc1c(CNCCC(=O)Nc2cccc(C)n2)c(C)nn1C
+    chembl_25=# select molregno,m from rdk.mols where m@>mol_adjust_query_properties('*c1cccc(NC(=O)*)n1',
+    chembl_25(# '{"adjustDegree":false}') limit 10;
+     molregno |                                                 m
+    ----------+---------------------------------------------------------------------------------------------------
+      2146308 | CCn1ncc2cc3nc(c21)NCCOC[C@H](c1ccccc1)NC(=O)N3
+      2137309 | CCn1ncc2cc3nc(c21)CCCO[C@@H](O)[C@H](c1ccccc1)NC(=O)N3
+      2102593 | CCn1ncc2cc3nc(c21)CCCO[C@@H]([C@@H](C)O)[C@@H](c1ccccc1)NC(=O)N3
+      2171613 | CCn1ncc2cc3nc(c21)CCCO[C@@H]([C@H](C)O)[C@@H](c1ccccc1)NC(=O)N3
+      2111904 | CCn1ncc2cc3nc(c21)C[C@H](O)COC[C@H](c1cccc(Cl)c1)NC(=O)N3
+      2173410 | CCn1ncc2cc3nc(c21)CCCOC[C@H](c1ccccc1)NC(=O)N3
+      2189450 | Cn1ncc2cc3nc(c21)CCCOC[C@H](c1ccccc1)NC(=O)N3
+      2195752 | CCn1ncc2cc3nc(c21)C[C@H](O)COC[C@H](c1ccccc1)NC(=O)N3
+      1609520 | Cc1cccc(NC(=O)c2cc(Br)ccc2C(=O)O)n1
+      1141456 | CCN(CC)CCCn1cc(NC(=O)Nc2cccc(-c3ccccc3)n2)c2ccccc21
    (10 rows)

    Time: 10.780 ms
@@ -336,20 +308,20 @@ where we disable the additional degree queries:
 or where we don't add the additional degree queries to ring atoms or dummies (they are only
 added to chain atoms):

-    chembl_23=# select molregno,m from rdk.mols where m@>mol_adjust_query_properties('*c1cccc(NC(=O)*)n1',
-    chembl_23(# '{"adjustDegree":true,"adjustDegreeFlags":"IGNORERINGS|IGNOREDUMMIES"}') limit 10;
-     molregno |                                             m                                             
-    ----------+-------------------------------------------------------------------------------------------
-      1993749 | Cn1c(Nc2c(Cl)ccc(CNC(=O)C(C)(C)C)c2Cl)nc2cc(C(=O)Nc3cccc(C(F)(F)F)n3)c(N3CCC(F)(F)C3)cc21
-      1957849 | COc1ccc2ncc(F)c(C[C@H](O)C3CCC(NCc4nc5c(cc4F)OCC(=O)N5)CO3)c2n1
-      1959611 | O=C1COc2ccc(CNC3CCN(CCn4c(=O)ccc5ncc(OCc6cccnn6)cc54)CC3)nc2N1
-      1988455 | Cc1cccc(C(=O)Nc2cccc(Oc3cccnc3)n2)c1
-      1873944 | Cc1ccc(C(=O)N(C)CC(=O)Nc2cccc(C)n2)cn1
-      1873968 | Cc1cccc(NC(=O)CN(C)C(=O)c2ccc(-n3cccc3)nc2)n1
-      1882693 | Cc1cccc(NC(=O)CCNCc2c(C)nn(C)c2N(C)C)n1
-      1882711 | COc1c(CNCCC(=O)Nc2cccc(C)n2)c(C)nn1C
-      1884388 | Cc1noc(COCC(=O)Nc2ccc(Br)c(C)n2)n1
-      1868705 | CCOc1cccc(NC(=O)c2cnc(C)cn2)n1
+    chembl_25=# select molregno,m from rdk.mols where m@>mol_adjust_query_properties('*c1cccc(NC(=O)*)n1',
+    chembl_25(# '{"adjustDegree":true,"adjustDegreeFlags":"IGNORERINGS|IGNOREDUMMIES"}') limit 10;
+     molregno |                                                 m
+    ----------+---------------------------------------------------------------------------------------------------
+      2146308 | CCn1ncc2cc3nc(c21)NCCOC[C@H](c1ccccc1)NC(=O)N3
+      2137309 | CCn1ncc2cc3nc(c21)CCCO[C@@H](O)[C@H](c1ccccc1)NC(=O)N3
+      2102593 | CCn1ncc2cc3nc(c21)CCCO[C@@H]([C@@H](C)O)[C@@H](c1ccccc1)NC(=O)N3
+      2171613 | CCn1ncc2cc3nc(c21)CCCO[C@@H]([C@H](C)O)[C@@H](c1ccccc1)NC(=O)N3
+      2111904 | CCn1ncc2cc3nc(c21)C[C@H](O)COC[C@H](c1cccc(Cl)c1)NC(=O)N3
+      2173410 | CCn1ncc2cc3nc(c21)CCCOC[C@H](c1ccccc1)NC(=O)N3
+      2189450 | Cn1ncc2cc3nc(c21)CCCOC[C@H](c1ccccc1)NC(=O)N3
+      2195752 | CCn1ncc2cc3nc(c21)C[C@H](O)COC[C@H](c1ccccc1)NC(=O)N3
+      1609520 | Cc1cccc(NC(=O)c2cc(Br)ccc2C(=O)O)n1
+      1141456 | CCN(CC)CCCn1cc(NC(=O)Nc2cccc(-c3ccccc3)n2)c2ccccc21
    (10 rows)

    Time: 12.827 ms
@@ -381,7 +353,7 @@ are constructed by combining operations from the list below with the `|` charact

 Basic similarity searching:

-    chembl_23=# select count(*) from rdk.fps where mfp2%morganbv_fp('Cc1ccc2nc(-c3ccc(NC(C4N(C(c5cccs5)=O)CCC4)=O)cc3)sc2c1');
+    chembl_25=# select count(*) from rdk.fps where mfp2%morganbv_fp('Cc1ccc2nc(-c3ccc(NC(C4N(C(c5cccs5)=O)CCC4)=O)cc3)sc2c1');
     count
    -------
        67
@@ -391,8 +363,8 @@ Basic similarity searching:

 Usually we'd like to find a sorted listed of neighbors along with the accompanying SMILES. This SQL function makes that pattern easy:

-    chembl_23=# create or replace function get_mfp2_neighbors(smiles text)
-        returns table(molregno integer, m mol, similarity double precision) as
+    chembl_25=# create or replace function get_mfp2_neighbors(smiles text)
+        returns table(molregno bigint, m mol, similarity double precision) as
      $$
      select molregno,m,tanimoto_sml(morganbv_fp(mol_from_smiles($1::cstring)),mfp2) as similarity
      from rdk.fps join rdk.mols using (molregno)
@@ -401,35 +373,35 @@ Usually we'd like to find a sorted listed of neighbors along with the accompanyi
      $$ language sql stable ;
    CREATE FUNCTION
    Time: 0.856 ms
-    chembl_23=# select * from get_mfp2_neighbors('Cc1ccc2nc(-c3ccc(NC(C4N(C(c5cccs5)=O)CCC4)=O)cc3)sc2c1') limit 10;
-     molregno |                             m                              |    similarity     
-    ----------+------------------------------------------------------------+-------------------
-       471319 | Cc1ccc2nc(-c3ccc(NC(=O)C4CCN(S(=O)(=O)c5cccs5)C4)cc3)sc2c1 | 0.638888888888889
-      1032469 | O=C(Nc1nc2ccc(Cl)cc2s1)[C@@H]1CCCN1C(=O)c1cccs1            | 0.623188405797101
-       751668 | COc1ccc2nc(NC(=O)[C@@H]3CCCN3C(=O)c3cccs3)sc2c1            | 0.619718309859155
-       471318 | Cc1ccc2nc(-c3ccc(NC(=O)C4CN(S(=O)(=O)c5cccs5)C4)cc3)sc2c1  | 0.611111111111111
-       740754 | Cc1ccc(NC(=O)C2CCCN2C(=O)c2cccs2)cc1C                      | 0.606060606060606
-       732905 | O=C(Nc1ccc(S(=O)(=O)N2CCCC2)cc1)C1CCCN1C(=O)c1cccs1        | 0.602941176470588
-      1087495 | Cc1ccc(NC(=O)C2CCCN2C(=O)c2cccs2)c(C)c1                    | 0.597014925373134
-       471462 | CCS(=O)(=O)N1CCC(C(=O)Nc2ccc(-c3nc4ccc(C)cc4s3)cc2)CC1     | 0.585714285714286
-       810850 | Cc1cc(C)n(-c2ccc(NC(=O)C3CCCCN3C(=O)c3cccs3)cc2)n1         | 0.583333333333333
-      1224407 | O=C(Nc1cccc(S(=O)(=O)N2CCCC2)c1)C1CCCN1C(=O)c1cccs1        | 0.579710144927536
+    chembl_25=# select * from get_mfp2_neighbors('Cc1ccc2nc(-c3ccc(NC(C4N(C(c5cccs5)=O)CCC4)=O)cc3)sc2c1') limit 10;
+     molregno |                                m                                 |    similarity
+    ----------+------------------------------------------------------------------+-------------------
+       751668 | COc1ccc2nc(NC(=O)[C@@H]3CCCN3C(=O)c3cccs3)sc2c1                  | 0.619718309859155
+       740754 | Cc1ccc(NC(=O)C2CCCN2C(=O)c2cccs2)cc1C                            | 0.606060606060606
+       732905 | O=C(Nc1ccc(S(=O)(=O)N2CCCC2)cc1)C1CCCN1C(=O)c1cccs1              | 0.602941176470588
+       810850 | Cc1cc(C)n(-c2ccc(NC(=O)C3CCCCN3C(=O)c3cccs3)cc2)n1               | 0.583333333333333
+      1224407 | O=C(Nc1cccc(S(=O)(=O)N2CCCC2)c1)C1CCCN1C(=O)c1cccs1              | 0.579710144927536
+       779258 | CC1CCN(S(=O)(=O)c2ccc(NC(=O)[C@@H]3CCCN3C(=O)c3cccs3)cc2)CC1     | 0.569444444444444
+       472441 | Cc1ccc2nc(-c3ccc(NC(=O)C4CCN(S(=O)(=O)C(C)C)CC4)cc3)sc2c1        | 0.569444444444444
+       745651 | Cc1ccc(NC(=O)[C@@H]2CCCN2C(=O)c2cccs2)cc1S(=O)(=O)N1CCCCC1       | 0.567567567567568
+       472510 | Cc1ccc2nc(-c3ccc(NC(=O)C4CCN(S(=O)(=O)c5cccc(Cl)c5)CC4)cc3)sc2c1 | 0.565789473684211
+      1233426 | Cc1cccc2sc(NC(=O)[C@@H]3CCCN3C(=O)c3cccs3)nc12                   | 0.563380281690141
    (10 rows)

    Time: 28.909 ms
-    chembl_23=# select * from get_mfp2_neighbors('Cc1ccc2nc(N(C)CC(=O)O)sc2c1') limit 10;
-     molregno |                           m                           |    similarity     
-    ----------+-------------------------------------------------------+-------------------
-      1044892 | Cc1ccc2nc(N(CCN(C)C)C(=O)c3cc(Cl)sc3Cl)sc2c1          | 0.518518518518518
-      1040496 | Cc1ccc2nc(N(CCCN(C)C)C(=O)CCc3ccccc3)sc2c1            | 0.517857142857143
-      1049393 | Cc1ccc2nc(N(CCCN(C)C)C(=O)CS(=O)(=O)c3ccccc3)sc2c1    | 0.517857142857143
-       441378 | Cc1ccc2nc(NC(=O)CCC(=O)O)sc2c1                        | 0.510204081632653
-      1047691 | Cc1ccc(S(=O)(=O)CC(=O)N(CCCN(C)C)c2nc3ccc(C)cc3s2)cc1 | 0.509090909090909
-       911501 | Cc1ccc2nc(N(CCN(C)C)C(=O)c3cc(Cl)sc3Cl)sc2c1.Cl       | 0.509090909090909
-      1042958 | Cc1ccc2nc(N(CCN(C)C)C(=O)c3ccc4ccccc4c3)sc2c1         | 0.509090909090909
-       775269 | Cc1ccc2nc(N(CCCN(C)C)C(=O)CCc3ccccc3)sc2c1.Cl         | 0.508771929824561
-      1045663 | Cc1ccc2nc(N(CCCN(C)C)C(=O)COc3ccc(Cl)cc3)sc2c1        |               0.5
-      1015485 | Cc1ccc2nc(N(Cc3cccnc3)C(=O)Cc3ccccc3)sc2c1            |               0.5
+    chembl_25=# select * from get_mfp2_neighbors('Cc1ccc2nc(N(C)CC(=O)O)sc2c1') limit 10;
+     molregno |                                m                         |    similarity
+    ----------+----------------------------------------------------------+-------------------
+      2138088 | CN(CC(=O)O)c1nc2ccc([N+](=O)[O-])cc2s1                   | 0.673913043478261
+      1040255 | CC(=O)N(CCCN(C)C)c1nc2ccc(C)cc2s1                        | 0.571428571428571
+       773946 | CC(=O)N(CCCN(C)C)c1nc2ccc(C)cc2s1.Cl                     |              0.56
+      1044892 | Cc1ccc2nc(N(CCN(C)C)C(=O)c3cc(Cl)sc3Cl)sc2c1             | 0.518518518518518
+       441378 | Cc1ccc2nc(NC(=O)CCC(=O)O)sc2c1                           | 0.510204081632653
+      1047691 | Cc1ccc(S(=O)(=O)CC(=O)N(CCCN(C)C)c2nc3ccc(C)cc3s2)cc1    | 0.509090909090909
+      1042958 | Cc1ccc2nc(N(CCN(C)C)C(=O)c3ccc4ccccc4c3)sc2c1            | 0.509090909090909
+      1015485 | Cc1ccc2nc(N(Cc3cccnc3)C(=O)Cc3ccccc3)sc2c1               |               0.5
+       994843 | Cc1ccc(S(=O)(=O)CC(=O)N(CCCN(C)C)c2nc3ccc(C)cc3s2)cc1.Cl |               0.5
+       841938 | Cc1ccc2nc(N(CCN(C)C)C(=O)c3ccc4ccccc4c3)sc2c1.Cl         |               0.5
    (10 rows)

    Time: 41.623 ms
@@ -438,40 +410,40 @@ Usually we'd like to find a sorted listed of neighbors along with the accompanyi

 By default, the minimum similarity returned with a similarity search is 0.5. This can be adjusted with the rdkit.tanimoto\_threshold (and rdkit.dice\_threshold) configuration variables:

-    chembl_23=# select count(*) from get_mfp2_neighbors('Cc1ccc2nc(N(C)CC(=O)O)sc2c1');
+    chembl_25=# select count(*) from get_mfp2_neighbors('Cc1ccc2nc(N(C)CC(=O)O)sc2c1');
     count
    -------
-        20
+        21
    (1 row)

    Time: 181.438 ms
-    chembl_23=# set rdkit.tanimoto_threshold=0.7;
+    chembl_25=# set rdkit.tanimoto_threshold=0.7;
    SET
    Time: 0.047 ms
-    chembl_23=# select count(*) from get_mfp2_neighbors('Cc1ccc2nc(N(C)CC(=O)O)sc2c1');
+    chembl_25=# select count(*) from get_mfp2_neighbors('Cc1ccc2nc(N(C)CC(=O)O)sc2c1');
     count
    -------
         0
    (1 row)

    Time: 161.228 ms
-    chembl_23=# set rdkit.tanimoto_threshold=0.6;
+    chembl_25=# set rdkit.tanimoto_threshold=0.6;
    SET
    Time: 0.045 ms
-    chembl_23=# select count(*) from get_mfp2_neighbors('Cc1ccc2nc(N(C)CC(=O)O)sc2c1');
+    chembl_25=# select count(*) from get_mfp2_neighbors('Cc1ccc2nc(N(C)CC(=O)O)sc2c1');
     count
    -------
-         1
+         2
    (1 row)

    Time: 184.275 ms
-    chembl_23=# set rdkit.tanimoto_threshold=0.5;
+    chembl_25=# set rdkit.tanimoto_threshold=0.5;
    SET
    Time: 0.055 ms
-    chembl_23=# select count(*) from get_mfp2_neighbors('Cc1ccc2nc(N(C)CC(=O)O)sc2c1');
+    chembl_25=# select count(*) from get_mfp2_neighbors('Cc1ccc2nc(N(C)CC(=O)O)sc2c1');
     count
    -------
-        20
+        21
    (1 row)

    Time: 181.100 ms
@@ -480,14 +452,14 @@ By default, the minimum similarity returned with a similarity search is 0.5. Thi

 The most straightforward use of the MCS code is to find the maximum common substructure of a group of molecules:

-    chembl_23=# select fmcs(m::text) from rdk.mols join compound_records using (molregno) where doc_id=4;
+    chembl_25=# select fmcs(m::text) from rdk.mols join compound_records using (molregno) where doc_id=4;
                                      fmcs                                  
    ------------------------------------------------------------------------
     [#6](-[#6]-[#7]-[#6]-[#6](-,:[#6])-,:[#6])-,:[#6]-,:[#6]-,:[#6]-,:[#6]
    (1 row)

    Time: 31.041 ms
-    chembl_23=# select fmcs(m::text) from rdk.mols join compound_records using (molregno) where doc_id=5;
+    chembl_25=# select fmcs(m::text) from rdk.mols join compound_records using (molregno) where doc_id=5;
                                                                       fmcs                                                                   
    ------------------------------------------------------------------------------------------------------------------------------------------
     [#6]-[#6](=[#8])-[#7]-[#6](-[#6](=[#8])-[#7]1-[#6]-[#6]-[#6]-[#6]-1-[#6](=[#8])-[#7]-[#6](-[#6](=[#8])-[#8])-[#6]-[#6])-[#6](-[#6])-[#6]
@@ -497,7 +469,7 @@ The most straightforward use of the MCS code is to find the maximum common subst

 The same thing can be done with a SMILES column:

-    chembl_23=# select fmcs(canonical_smiles) from compound_structures join compound_records using (molregno) where doc_id=4;
+    chembl_25=# select fmcs(canonical_smiles) from compound_structures join compound_records using (molregno) where doc_id=4;
                                      fmcs                                  
    ------------------------------------------------------------------------
     [#6](-[#7]-[#6]-[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6])-[#6](-,:[#6])-,:[#6]
@@ -507,9 +479,9 @@ The same thing can be done with a SMILES column:

 It's also possible to adjust some of the parameters to the FMCS algorithm, though this is somewhat more painful as of this writing (the 2017\_03 release cycle). Here are a couple of examples:

-    chembl_23=# select fmcs_smiles(str,'{"Threshold":0.8}') from
-    chembl_23-#    (select string_agg(m::text,' ') as str from rdk.mols
-    chembl_23(#    join compound_records using (molregno) where doc_id=4) as str ;
+    chembl_25=# select fmcs_smiles(str,'{"Threshold":0.8}') from
+    chembl_25-#    (select string_agg(m::text,' ') as str from rdk.mols
+    chembl_25(#    join compound_records using (molregno) where doc_id=4) as str ;

                                                                               fmcs_smiles                                                                            
    ------------------------------------------------------------------------------------------------------------------------------------------------------------------
@@ -517,10 +489,10 @@ It's also possible to adjust some of the parameters to the FMCS algorithm, thoug
    (1 row)

    Time: 9673.949 ms
-    chembl_23=#
-    chembl_23=# select fmcs_smiles(str,'{"AtomCompare":"Any"}') from
-    chembl_23-#    (select string_agg(m::text,' ') as str from rdk.mols
-    chembl_23(#    join compound_records using (molregno) where doc_id=4) as str ;
+    chembl_25=#
+    chembl_25=# select fmcs_smiles(str,'{"AtomCompare":"Any"}') from
+    chembl_25-#    (select string_agg(m::text,' ') as str from rdk.mols
+    chembl_25(#    join compound_records using (molregno) where doc_id=4) as str ;
                                                                                  fmcs_smiles                                                                               
    ------------------------------------------------------------------------------------------------------------------------------------------------------------------------
     [#6]-,:[#6,#7]-[#8,#6]-[#6,#7](-[#6,#8]-[#7,#6]-,:[#6,#7]-,:[#6,#7]-,:[#7,#6]-,:[#6])-[#6,#7]-[#6]-[#6](-[#8,#6]-[#6])-[#6,#7]-[#7,#6]-[#6]-,:[#6,#8]-,:[#7,#6]-,:[#6]
@@ -530,9 +502,9 @@ It's also possible to adjust some of the parameters to the FMCS algorithm, thoug

 *Note* The combination of `"AtomCompare":"Any"` and a value of `"Threshold"` that is less than 1.0 does a quite generic search and can results in very long search times. Using `"Timeout"` with this combination is recommended:

-    chembl_23=# select fmcs_smiles(str,'{"AtomCompare":"Any","CompleteRingsOnly":true,"Threshold":0.8,"Timeout":60}') from
-    chembl_23-#    (select string_agg(m::text,' ') as str from rdk.mols
-    chembl_23(#    join compound_records using (molregno) where doc_id=3) as str ;
+    chembl_25=# select fmcs_smiles(str,'{"AtomCompare":"Any","CompleteRingsOnly":true,"Threshold":0.8,"Timeout":60}') from
+    chembl_25-#    (select string_agg(m::text,' ') as str from rdk.mols
+    chembl_25(#    join compound_records using (molregno) where doc_id=3) as str ;

    WARNING:  findMCS timed out, result is not maximal
                                                                                              fmcs_smiles                                                                    
@@ -724,7 +696,7 @@ The recommended adapter for connecting to postgresql is pyscopg2 (<https://pypi.
 Here's an example of connecting to our local copy of ChEMBL and doing a basic substructure search:

    >>> import psycopg2
-    >>> conn = psycopg2.connect(database='chembl_16')
+    >>> conn = psycopg2.connect(database='chembl_25')
    >>> curs = conn.cursor()
    >>> curs.execute('select * from rdk.mols where m@>%s',('c1cccc2c1nncc2',))
    >>> curs.fetchone()
@@ -735,12 +707,12 @@ That returns a SMILES for each molecule. If you plan to do more work with the mo
    >>> curs.execute('select molregno,mol_send(m) from rdk.mols where m@>%s',('c1cccc2c1nncc2',))
    >>> row = curs.fetchone()
    >>> row
-    (9830, <read-only buffer for 0x...>)
+    (9830, <memory at 0x...>)

 These pickles can then be converted into molecules:

    >>> from rdkit import Chem
-    >>> m = Chem.Mol(str(row[1]))
+    >>> m = Chem.Mol(row[1].tobytes())
    >>> Chem.MolToSmiles(m,True)
    'CC(C)Sc1ccc(CC2CCN(C3CCN(C(=O)c4cnnc5ccccc54)CC3)CC2)cc1'