Files
rdkit/Contrib/M_Kossner/Frames.py
Greg Landrum 24f1737839 Remove a bunch of Python2-related warts (#2315)
* remove all of the "from __future__" imports

* remove the first batch of rdkit.six imports/uses

* next step of rdkit.six removal

* removing xrange, range, and some maps

* next round of removals

* next round of cleanups

* fix inchi test

* last bits of "from rdkit.six" are gone

* and the last of the six stuff is gone

* strange importlib problem
2019-03-06 20:43:49 -05:00

183 lines
6.3 KiB
Python

#!/usr/bin/python
# encoding: utf-8
# Jan 2011 (markus kossner) Cleaned up the code, added some documentation
# somwhere around Aug 2008 (markus kossner) created
#
# This script extracts the molecular framework for a database of molecules.
# You can use two modes (hard coded):
# - Scaff: The molecular frame is extracted
# - RedScaff: All linking chains between rings are deleted. The rings are directly connected.
#
# You can comment in/out the code snippets indicated by the comments
# to force each atom of the frame to be a Carbon.
#
# Usage: Frames.py <database.sdf>
# Output:
# - sd files containing all molecules belonging to one frame (1.sdf, 2.sdf etc)
# - frames.smi containing the (caninical) smiles and count of occurrence
#
import os, sys
from Chem import AllChem as Chem
def flatten(x):
"""flatten(sequence) -> list
Returns a single, flat list which contains all elements retrieved
from the sequence and all nested sub-sequences (iterables).
Examples:
>>> [1, 2, [3,4], (5,6)]
[1, 2, [3, 4], (5, 6)]
>>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, MyVector(8,9,10)])
[1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]"""
result = []
for el in x:
if hasattr(el, "__iter__") and not isinstance(el, basestring):
result.extend(flatten(el))
else:
result.append(el)
return result
def GetFrame(mol, mode='Scaff'):
'''return a ganeric molecule defining the reduced scaffold of the input mol.
mode can be 'Scaff' or 'RedScaff':
Scaff -> chop off the side chains and return the scaffold
RedScaff -> remove all linking chains and connect the rings
directly at the atoms where the linker was
'''
ring = mol.GetRingInfo()
RingAtoms = flatten(ring.AtomRings())
NonRingAtoms = [atom.GetIdx() for atom in mol.GetAtoms() if atom.GetIdx() not in RingAtoms]
RingNeighbors = []
Paths = []
for NonRingAtom in NonRingAtoms:
for neighbor in mol.GetAtomWithIdx(NonRingAtom).GetNeighbors():
if neighbor.GetIdx() in RingAtoms:
RingNeighbors.append(NonRingAtom)
#The ring Atoms having a non ring Neighbor will be the start of a walk
Paths.append([neighbor.GetIdx(), NonRingAtom])
break
PosConnectors = [x for x in NonRingAtoms if x not in RingNeighbors
] #Only these Atoms are potential starting points of a Linker chain
#print 'PosConnectors:'
#print PosConnectors
Framework = [x for x in RingAtoms]
#Start a list of pathways which we will have to walk
#print 'Path atoms:'
#print Paths
Linkers = []
while len(Paths) > 0:
NewPaths = []
for P in Paths:
if P == None:
print('ooh')
else:
for neighbor in mol.GetAtomWithIdx(P[-1]).GetNeighbors():
if neighbor.GetIdx() not in P:
if neighbor.GetIdx() in NonRingAtoms:
n = P[:]
n.append(neighbor.GetIdx())
NewPaths.append(n[:])
elif neighbor.GetIdx() in RingAtoms:
#print 'adding the following path to Framework:'
#print P
n = P[:]
n.append(neighbor.GetIdx())
Linkers.append(n)
Framework = Framework + P[:]
Paths = NewPaths[:]
#print 'Linkers:',Linkers
#print 'RingAtoms:',RingAtoms
#em.AddBond(3,4,Chem.BondType.SINGLE)
if mode == 'RedScaff':
Framework = list(set(Framework))
todel = []
NonRingAtoms.sort(reverse=True)
em = Chem.EditableMol(mol)
BondsToAdd = [sorted([i[0], i[-1]]) for i in Linkers]
mem = []
for i in BondsToAdd:
if i not in mem:
em.AddBond(i[0], i[1], Chem.BondType.SINGLE)
mem.append(i)
for i in NonRingAtoms:
todel.append(i)
for i in todel:
em.RemoveAtom(i)
m = em.GetMol()
#===================================#
# Now do the flattening of atoms and bonds!
# Any heavy atom will become a carbon and any bond will become a single bond! #
#===================================#
# for atom in m.GetAtoms(): #
# atom.SetAtomicNum(6) #
# atom.SetFormalCharge(0) #
# for bond in m.GetBonds(): #
# bond.SetBondType(Chem.BondType.SINGLE) #
# Chem.SanitizeMol(m) #
#===================================#
return m
if mode == 'Scaff':
Framework = list(set(Framework))
todel = []
NonRingAtoms.sort(reverse=True)
for i in NonRingAtoms:
if i != None:
if i not in Framework:
todel.append(i)
em = Chem.EditableMol(mol)
for i in todel:
em.RemoveAtom(i)
m = em.GetMol()
#===================================#
# Now do the flattening of atoms and bonds!
# Any heavy atom will become a carbon and any bond will become a single bond!! #
#===================================#
# for atom in m.GetAtoms(): #
# atom.SetAtomicNum(6) #
# atom.SetFormalCharge(0) #
# for bond in m.GetBonds(): #
# bond.SetBondType(Chem.BondType.SINGLE) #
# Chem.SanitizeMol(m) #
#===================================#
return m
if __name__ == '__main__':
if len(sys.argv) < 2:
print("No input file provided: Frames.py filetosprocess.ext")
sys.exit(1)
suppl = Chem.SDMolSupplier(sys.argv[1])
FrameDict = {}
for mol in suppl:
m = GetFrame(mol)
cansmiles = Chem.MolToSmiles(m, isomericSmiles=True)
if cansmiles in FrameDict:
FrameDict[cansmiles].append(mol)
else:
FrameDict[cansmiles] = [mol, ]
counter = 0
w = open('frames.smi', 'w')
for key, item in FrameDict.items():
counter += 1
d = Chem.SDWriter(str(counter) + '.sdf')
for i in item:
i.SetProp('Scaffold', key)
i.SetProp('Cluster', str(counter))
d.write(i)
print(key, len(item))
w.write(key + '\t' + str(len(item)) + '\n')
w.close
print('number of Clusters: %d' % (counter))