mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
Ifg contribution (#1813)
* ifg contribution * ifg python file * Delete settings.json * Update README
This commit is contained in:
committed by
Greg Landrum
parent
b1fc9cd8e6
commit
9d72e645fa
33
Contrib/IFG/README
Normal file
33
Contrib/IFG/README
Normal file
@@ -0,0 +1,33 @@
|
||||
IFG Algorithm.
|
||||
|
||||
Implementation of
|
||||
|
||||
An algorithm to identify functional groups in organic molecules
|
||||
Peter Ertl
|
||||
|
||||
https://jcheminf.springeropen.com/articles/10.1186/s13321-017-0225-z
|
||||
|
||||
|
||||
Authors:
|
||||
Richard Hall,
|
||||
Guillaume Godin modified function output to be more readable
|
||||
|
||||
|
||||
Usage:
|
||||
```python
|
||||
# return the list of IFG (atomIds, atoms & type) for a molecule object:
|
||||
m = Chem.MolFromSmiles(smiles)
|
||||
fgs = identify_functional_groups(m)
|
||||
print fgs
|
||||
```
|
||||
Output example:
|
||||
[IFG(atomIds=(2,), atoms='n', type='cnc'),
|
||||
IFG(atomIds=(4, 5, 6, 7), atoms='NS(=O)=O', type='cNS(c)(=O)=O'),
|
||||
IFG(atomIds=(12,), atoms='N', type='cN'),
|
||||
IFG(atomIds=(15,), atoms='n', type='cnc')]
|
||||
|
||||
Notes:
|
||||
|
||||
This implementation of Ertl paper was made by Richard Hall in summer 2017.
|
||||
Cause RDKit and Peter tool can have distinct aromaticity dectection behaviours, list
|
||||
of functionnal groups in aromatic rings may differ in presence of conjugated aromatics rings.
|
||||
96
Contrib/IFG/ifg.py
Normal file
96
Contrib/IFG/ifg.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# Richard hall 2017
|
||||
# IFG main code
|
||||
# Guillaume Godin 2017
|
||||
# refine output function
|
||||
# astex_ifg: identify functional groups a la Ertl, J. Cheminform (2017) 9:36
|
||||
from rdkit import Chem
|
||||
from collections import namedtuple
|
||||
|
||||
def merge(mol, marked, aset):
|
||||
bset = set()
|
||||
for idx in aset:
|
||||
atom = mol.GetAtomWithIdx(idx)
|
||||
for nbr in atom.GetNeighbors():
|
||||
jdx = nbr.GetIdx()
|
||||
if jdx in marked:
|
||||
marked.remove(jdx)
|
||||
bset.add(jdx)
|
||||
if not bset:
|
||||
return
|
||||
merge(mol, marked, bset)
|
||||
aset.update(bset)
|
||||
|
||||
# atoms connected by non-aromatic double or triple bond to any heteroatom
|
||||
# c=O should not match (see fig1, box 15). I think using A instead of * should sort that out?
|
||||
PATT_DOUBLE_TRIPLE = Chem.MolFromSmarts('A=,#[!#6]')
|
||||
# atoms in non aromatic carbon-carbon double or triple bonds
|
||||
PATT_CC_DOUBLE_TRIPLE = Chem.MolFromSmarts('C=,#C')
|
||||
# acetal carbons, i.e. sp3 carbons connected to tow or more oxygens, nitrogens or sulfurs; these O, N or S atoms must have only single bonds
|
||||
PATT_ACETAL = Chem.MolFromSmarts('[CX4](-[O,N,S])-[O,N,S]')
|
||||
# all atoms in oxirane, aziridine and thiirane rings
|
||||
PATT_OXIRANE_ETC = Chem.MolFromSmarts('[O,N,S]1CC1')
|
||||
|
||||
PATT_TUPLE = (PATT_DOUBLE_TRIPLE, PATT_CC_DOUBLE_TRIPLE, PATT_ACETAL, PATT_OXIRANE_ETC)
|
||||
|
||||
def identify_functional_groups(mol):
|
||||
marked = set()
|
||||
#mark all heteroatoms in a molecule, including halogens
|
||||
for atom in mol.GetAtoms():
|
||||
if atom.GetAtomicNum() not in (6,1): # would we ever have hydrogen?
|
||||
marked.add(atom.GetIdx())
|
||||
|
||||
#mark the four specific types of carbon atom
|
||||
for patt in PATT_TUPLE:
|
||||
for path in mol.GetSubstructMatches(patt):
|
||||
for atomindex in path:
|
||||
marked.add(atomindex)
|
||||
|
||||
#merge all connected marked atoms to a single FG
|
||||
groups = []
|
||||
while marked:
|
||||
grp = set([marked.pop()])
|
||||
merge(mol, marked, grp)
|
||||
groups.append(grp)
|
||||
|
||||
#extract also connected unmarked carbon atoms
|
||||
ifg = namedtuple('IFG', ['atomIds', 'atoms', 'type'])
|
||||
ifgs = []
|
||||
for g in groups:
|
||||
uca = set()
|
||||
for atomidx in g:
|
||||
for n in mol.GetAtomWithIdx(atomidx).GetNeighbors():
|
||||
if n.GetAtomicNum() == 6:
|
||||
uca.add(n.GetIdx())
|
||||
ifgs.append(ifg(atomIds=tuple(list(g)), atoms=Chem.MolFragmentToSmiles(mol, g, canonical=True), type=Chem.MolFragmentToSmiles(mol, g.union(uca),canonical=True)))
|
||||
return ifgs
|
||||
|
||||
def main():
|
||||
for ix, smiles in enumerate([
|
||||
'Cc1nc(NS(=O)(=O)c2ccc(N)cc2)nc(C)c1', # fig1, 1
|
||||
'NC(=N)c1ccc(C=Cc2ccc(cc2O)C(=N)N)cc1', # 2
|
||||
'CC(=O)Nc1nnc(s1)S(=O)(=O)N', # 3
|
||||
'NS(=O)(=O)c1cc2c(NCNS2(=O)=O)cc1Cl', # 4
|
||||
'CNC1=Nc2ccc(Cl)cc2C(=N(=O)C1)c3ccccc3', # 5
|
||||
'Cc1onc(c1C(=O)NC2C3SC(C)(C)C(N3C2=O)C(=O)O)c4ccccc4', # 6
|
||||
'Clc1ccccc1C2=NCC(=O)Nc3ccc(cc23)N(=O)=O', # 7
|
||||
'COc1cc(cc(C(=O)NCC2CCCN2CC=C)c1OC)S(=O)(=O)N', # 8
|
||||
'Cc1ccc(Cl)c(Nc2ccccc2C(=O)O)c1Cl', # 9
|
||||
'Clc1ccc2Oc3ccccc3N=C(N4CCNCC4)c2c1', # 10 - there is a discrepancy with the paper here! I wonder if Peter has the ring as aromatic?
|
||||
'FC(F)(F)CN1C(=O)CN=C(c2ccccc2)c3cc(Cl)ccc13', # 11
|
||||
'OCC1OC(CC1O)n2cnc3C(O)CNC=Nc32', # 12
|
||||
'CCNC1CC(C)S(=O)(=O)c2sc(cc12)S(=O)(=O)N', # 13
|
||||
'CC(O)C1C2C(C)C(=C(N2C1=O)C(=O)O)SC3CNC(C3)C(=O)N(C)C', # 14
|
||||
'CC1CN(CC(C)N1)c2c(F)c(N)c3c(=O)c(cn(C4CC4)c3c2F)C(=O)O', # 15
|
||||
'CC(=CCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3)C', # 16
|
||||
'Clc1ccc2N=C3NC(=O)CN3Cc2c1Cl', # 17
|
||||
'CC(=O)NC1C(NC(=N)N)C=C(OC1C(O)C(O)CO)C(=O)O', # 18
|
||||
'CC(O)C(O)C1CNc2nc(N)nc(O)c2N1', # 19
|
||||
'NC1CCCCN(C1)c2c(Cl)cc3c(=O)c(cn(C4CC4)c3c2Cl)C(=O)O', # 20
|
||||
]):
|
||||
m = Chem.MolFromSmiles(smiles)
|
||||
fgs = identify_functional_groups(m)
|
||||
print '%2d: %d fgs'%(ix+1, len(fgs)), fgs
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user