added

2026-06-03 21:44:30 +08:00 · 2010-09-07 05:43:26 +00:00
parent 514b59e12f
commit ff3fcfc3f8
3 changed files with 392 additions and 0 deletions
--- a/Contrib/M_Kossner/BaseFeatures_DIP2_NoMicrospecies.fdef
+++ b/Contrib/M_Kossner/BaseFeatures_DIP2_NoMicrospecies.fdef
@@ -0,0 +1,202 @@
+# $Id$
+#
+# FDef file contributed by Markus Kossner
+
+AtomType AcidicN	[$([NH,N-1](S(=O))(C(=O))),$([NH,N-1](C(=O))(C(=O))),$([NH,N-1](S(=O)(=O))c),$([NH2]S(=O)(=O)c)]
+AtomType AmideN [$([#7]-C(=O))]
+AtomType AmidineTripleN [$([#7]-C(=[#7]))]
+AtomType SulfonamideN [$([N;H0]S(=O)(=O))]
+AtomType NDonor [N&!H0&v3,N&!H0&+1&v4,n&H1&+0;!{AcidicN};!{AmidineTripleN}]
+AtomType NDonor [$([Nv3](-C)(-C)-C);!{AmideN};!{AmidineTripleN}] #Primary Amine but not Amide!
+AtomType NDonor [$(n[n;H1]),$(nc[n;H1]);!{AcidicN}]
+
+AtomType AcidicHydroxyl [$([O]C(=[O,S,P]))]
+AtomType ChalcDonor [O,S;H1;+0]
+DefineFeature SingleAtomDonor [{NDonor},{ChalcDonor};!{AcidicHydroxyl}]
+  Family Donor
+  Weights 1
+EndFeature
+
+# aromatic N, but not indole or pyrole or fusing two rings or tetrazole
+AtomType NAcceptor [$([N;H0]#,=[C&v4])]
+AtomType NAcceptor [n;+0;H0;!X3] #;!$([n](n))   !$([n;H1](cc)cc)
+# tertiary nitrogen adjacent to aromatic carbon
+AtomType NAcceptor [N&v3;H0;$(Nc)]
+
+# removes ether, thioether and nitro oxygen
+AtomType ChalcAcceptor [O;H0;!$(O=N-*);!$([OD2]([#6])[#6])]
+
+# Removed aromatic sulfur from ChalcAcceptor definition
+#We will not use aromatic oxygens! cf Leach, Gillet ... J Med Chem 2010,53, 539-558
+#Atomtype ChalcAcceptor [o;+0]
+
+# Hydroxyls and acids
+AtomType Hydroxyl [O;H1;v2]
+
+# F is an acceptor so long as the C has no other halogen neighbors. This is maybe
+# a bit too general, but the idea is to eliminate things like CF3
+AtomType HalogenAcceptor [F;$(F-[#6]);!$(FC[F,Cl,Br,I])]
+
+DefineFeature SingleAtomAcceptor [{Hydroxyl},{ChalcAcceptor},{NAcceptor},{HalogenAcceptor}]
+  Family Acceptor
+  Weights 1
+EndFeature
+
+# this one is NOT delightfully easy :-) 
+#We wil define some additional NegIonizable Features 
+#I later found some similar substructures in 
+# Good,Oprea J.Comput.Aided.Mol.Des (2008) 22:169-178:
+
+DefineFeature AcidicGroup [C,S,P](=[O,S])-[O;H1,H0&-1]
+  Family NegIonizable
+  Weights 1.0,1.0,1.0
+EndFeature
+
+DefineFeature AcidicN [{AcidicN};!$([nH,n-1]1cnnn1)]
+  Family NegIonizable
+  Weights 1,0,1,1,0,0
+EndFeature
+
+DefineFeature Tetrazole c1nnn[nH,n-1]1
+  Family NegIonizable
+  Weights 1,1,1,1,1
+EndFeature
+
+AtomType Carbon_NotDouble [C;!$(C=*)]
+AtomType BasicNH2 [$([N;H2&+0;!R][{Carbon_NotDouble}])]
+AtomType BasicNH1 [$([N;H1&+0;!R]([{Carbon_NotDouble}])[{Carbon_NotDouble}])]
+AtomType BasicNH0 [$([N;H0&+0;!R]([{Carbon_NotDouble}])([{Carbon_NotDouble}])[{Carbon_NotDouble}])]
+#AtomType QuatN [$([N;H0&+1]([{Carbon_NotDouble}])([{Carbon_NotDouble}])([{Carbon_NotDouble}])[{Carbon_NotDouble}])]
+
+DefineFeature BasicGroup [{BasicNH2},{BasicNH1},{BasicNH0};!$(N[a])]
+  Family PosIonizable
+  Weights 1.0
+EndFeature
+
+# 14.11.2007 (GL): add !$([N+]-[O-]) constraint so we don't match
+# nitro (or similar) groups
+DefineFeature PosN [#7;+;!$([N+]-[O-])]
+ Family PosIonizable
+ Weights 1.0
+EndFeature
+
+#Define electronegative atoms that might have an impact on pka of moieties otherwise basic?
+AtomType ElectroNegative [N,n,O,o,F,Cl,Br]
+# imidazole group can be positively charged (too promiscuous?)
+#DefineFeature Imidazole  [n,+0]1[$(cC),$([cH])][n;+0][$(cC),$([cH])][$(cC),$([cH])]1
+#  Family PosIonizable
+#  Weights 1.0,1.0,1.0,1.0,1.0
+#EndFeature
+
+# guanidine group is positively charged (too promiscuous? We do not match any aromatic systems here. That would be too promiscuous!)
+DefineFeature Guanidine [N;+0;!{AmideN}][#6;+0](=[N;+0;!{AmideN}])[N;+0;!{AmideN}]
+  Family PosIonizable
+  Weights 1.0,1.0,1.0,1.0
+EndFeature
+
+# amidine group is positively charged (too promiscuous?)
+DefineFeature Amidine [N;+0;!{AmideN}][#6]=,:[#7;+0;!$(ncn);!{AmideN}]
+  Family PosIonizable
+  Weights 1.0,1.0,1.0
+EndFeature
+
+# aromatic rings of various sizes:
+#
+# Note that with the aromatics, it's important to include the ring-size queries along with
+# the aromaticity query for two reasons:
+#   1) Much of the current feature-location code assumes that the feature point is 
+#      equidistant from the atoms defining it. Larger definitions like: a1aaaaaaaa1 will actually 
+#      match things like 'o1c2cccc2ccc1', which have an aromatic unit spread across multiple simple
+#      rings and so don't fit that requirement.
+#   2) It's *way* faster.
+# 21.1.2008 (GL): update ring membership tests to reflect corrected meaning of
+# "r" in SMARTS parser
+#
+AtomType AromR4 [a;r4,!R1&r3]
+DefineFeature Arom4 [{AromR4}]1:[{AromR4}]:[{AromR4}]:[{AromR4}]:1
+ Family Aromatic
+ Weights 1.0,1.0,1.0,1.0
+EndFeature
+AtomType AromR5 [a;r5,!R1&r4,!R1&r3]
+DefineFeature Arom5 [{AromR5}]1:[{AromR5}]:[{AromR5}]:[{AromR5}]:[{AromR5}]:1
+ Family Aromatic
+ Weights 1.0,1.0,1.0,1.0,1.0
+EndFeature
+AtomType AromR6 [a;r6,!R1&r5,!R1&r4,!R1&r3]
+DefineFeature Arom6 [{AromR6}]1:[{AromR6}]:[{AromR6}]:[{AromR6}]:[{AromR6}]:[{AromR6}]:1
+ Family Aromatic
+ Weights 1.0,1.0,1.0,1.0,1.0,1.0
+EndFeature
+AtomType AromR7 [a;r7,!R1&r6,!R1&r5,!R1&r4,!R1&r3]
+DefineFeature Arom7 [{AromR7}]1:[{AromR7}]:[{AromR7}]:[{AromR7}]:[{AromR7}]:[{AromR7}]:[{AromR7}]:1
+ Family Aromatic
+ Weights 1.0,1.0,1.0,1.0,1.0,1.0,1.0
+EndFeature
+AtomType AromR8 [a;r8,!R1&r7,!R1&r6,!R1&r5,!R1&r4,!R1&r3]
+DefineFeature Arom8 [{AromR8}]1:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:[{AromR8}]:1
+ Family Aromatic
+ Weights 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
+EndFeature
+
+# hydrophobic features
+# any carbon that is not bonded to a polar atom is considered a hydrophobe
+# 
+# 23.11.2007 (GL): match any bond (not just single bonds); add #6 at
+#  beginning to make it more efficient
+AtomType Carbon_Polar [#6;$([#6]~[#7,#8,#9])]
+# 23.11.2007 (GL): don't match charged carbon
+AtomType Carbon_NonPolar [#6;+0;!{Carbon_Polar}]
+
+# 6.2009 (MK): aromatic c is not per se a hydrophobic atom
+#AtomType RingHphobe [R;{Hphobe}]
+AtomType Hphobe [s,S&H0&v2,Br,I,Cl,F,{Carbon_NonPolar}]
+
+# 8.2009 (MK): update  'Hydrophobes'
+DefineFeature Hphobe [{Hphobe}{Hphobe}]
+ Family Hydrophobe
+  Weights 1.0
+EndFeature
+
+# 8.2009 (MK): 
+DefineFeature Methyl [CH3]
+  Family Hydrophobe
+  Weights 1.0
+EndFeature
+
+# nitro groups in the RD code are always: *-[N+](=O)[O-]
+DefineFeature Nitro2 [N;D3;+](=O)[O-]
+  Family LumpedHydrophobe
+  Weights 1.0,1.0,1.0
+EndFeature
+
+DefineFeature ThreeWayAttach [D3,D4]
+  Family LumpedHydrophobe
+  Weights 1.0
+EndFeature
+
+DefineFeature ChainEnd [R0;D1][R0;D2]
+  Family LumpedHydrophobe
+  Weights 1.0,1.0
+EndFeature
+
+# 5.2009 (MK): update ring membership tests to reflect 'Lumped Hydrophobes' 
+AtomType Ring6 [r6,!R1&r5,!R1&r4,!R1&r3]
+DefineFeature R6_6 [{Ring6}]1[{Ring6}][{Ring6}][{Ring6}][{Ring6}][{Ring6}]1
+  Family LumpedHydrophobe
+  Weights 1.0,1.0,1.0,1.0,1.0,1.0
+EndFeature
+AtomType Ring5 [r5,!R1&r4,!R1&r3]
+DefineFeature R5_5 [{Ring5}]1[{Ring5}][{Ring5}][{Ring5}][{Ring5}]1
+  Family LumpedHydrophobe
+  Weights 1.0,1.0,1.0,1.0,1.0
+EndFeature
+AtomType Ring4 [r4,!R1&r3]
+DefineFeature H4_4 [{Ring4}]1[{Ring4}][{Ring4}][{Ring4}]1
+  Family LumpedHydrophobe
+  Weights 1.0,1.0,1.0,1.0
+EndFeature
+AtomType Ring3 [r3]
+DefineFeature R3_3 [{Ring3}]1[{Ring3}][{Ring3}]1
+  Family LumpedHydrophobe
+  Weights 1.0,1.0,1.0
+EndFeature
--- a/Contrib/M_Kossner/Frames.py
+++ b/Contrib/M_Kossner/Frames.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python2.6
+# encoding: utf-8
+# $Id$
+# original author: Markus Kossner
+
+import os,sys
+from Chem import AllChem as Chem
+
+def flatten(x):
+    """flatten(sequence) -> list
+    Returns a single, flat list which contains all elements retrieved
+    from the sequence and all nested sub-sequences (iterables).
+    Examples:
+    >>> [1, 2, [3,4], (5,6)]
+    [1, 2, [3, 4], (5, 6)]
+    >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, MyVector(8,9,10)])
+    [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]"""
+    result = []
+    for el in x:
+        if hasattr(el, "__iter__") and not isinstance(el, basestring):
+            result.extend(flatten(el))
+        else:
+            result.append(el)
+    return result
+
+def GetFrame(mol,mode='RedScaff'):
+	'''return a ganeric molecule defining the reduced scaffold of the molecule.
+	mode can be 'RedScaff' and 'Scaff'. The second mode will turn every atom to a carbon and every bond to a single bond!'''
+	ring=mol.GetRingInfo()
+	RingAtoms= flatten(ring.AtomRings())
+	NonRingAtoms=[ atom.GetIdx() for atom in mol.GetAtoms() if atom.GetIdx() not in RingAtoms ]
+	RingNeighbors=[]
+	Paths=[]
+	for NonRingAtom in NonRingAtoms:
+		for neighbor in mol.GetAtomWithIdx(NonRingAtom).GetNeighbors():
+			if neighbor.GetIdx() in RingAtoms:
+				RingNeighbors.append(NonRingAtom)
+				Paths.append([neighbor.GetIdx(),NonRingAtom]) #The ring Atoms having a non ring Nieghbor will be the start of a walk
+				break
+	PosLinkers=[x for x in NonRingAtoms if x not in RingNeighbors] #Only these Atoms are Possible Linkers of two rings
+	Framework=[ x for x in RingAtoms ]
+	Linkers=[]
+	while len(Paths)>0:
+		NewPaths=[]
+		for P in Paths:
+			if P==None:
+				print 'ooh, there is still a bug somewere '
+			else:
+				for neighbor in mol.GetAtomWithIdx(P[-1]).GetNeighbors():
+					if neighbor.GetIdx() not in P:
+						if neighbor.GetIdx() in NonRingAtoms:
+							n=P[:]
+							n.append(neighbor.GetIdx())
+							NewPaths.append(n[:])
+						elif neighbor.GetIdx() in RingAtoms:
+							n=P[:]
+							n.append(neighbor.GetIdx())
+							Linkers.append(n)
+							Framework=Framework+P[:]
+
+		Paths=NewPaths[:]
+	#print 'Linkers:',Linkers
+	#print 'RingAtoms:',RingAtoms
+	if mode=='Scaff':
+		Framework=list(set(Framework))
+		todel=[]
+		NonRingAtoms.sort(reverse=True)
+		em=Chem.EditableMol(mol)
+		BondsToAdd=[ sorted([i[0],i[-1]]) for i in Linkers ]
+		mem=[]
+		for i in BondsToAdd:
+			if i not in mem:
+				em.AddBond(i[0],i[1],Chem.BondType.SINGLE)
+				mem.append(i)
+		for i in NonRingAtoms:
+			todel.append(i)
+		for i in todel:
+			em.RemoveAtom(i)
+		m=em.GetMol()
+		return m
+
+	if mode=='RedScaff':
+		Framework=list(set(Framework))
+		todel=[]
+		NonRingAtoms.sort(reverse=True)
+		for i in NonRingAtoms:
+			if i != None:
+				if i not in Framework:
+					todel.append(i)
+		em=Chem.EditableMol(mol)
+		for i in todel:
+			em.RemoveAtom(i)
+		m=em.GetMol()
+		#===================================#
+		#Now do the flattening of atoms and bonds! 
+		#Any heavy atom will become a carbon and any bond will become a single bond!!		#
+		#===================================#
+		for atom in m.GetAtoms():                                                 #
+			atom.SetAtomicNum(6)                                                    #
+			atom.SetFormalCharge(0)                                                #
+		for bond in m.GetBonds():                                                   #
+			bond.SetBondType(Chem.BondType.SINGLE)                 #
+		Chem.SanitizeMol(m)                                                          #
+		#===================================#
+		return m
+
+if len(sys.argv) < 2:
+	print "No input file provided: Frames.py <file-to-process.sdf>"
+	sys.exit(1)
+
+suppl=Chem.SDMolSupplier(sys.argv[1])
+
+FrameDict={}
+
+for mol in suppl:
+	if mol == 'None': continue
+	try:
+		m=GetFrame(mol,mode='RedScaff')
+		if FrameDict.has_key(Chem.MolToSmiles(m)):
+			FrameDict[Chem.MolToSmiles(m)].append(mol)
+		else:
+			FrameDict[Chem.MolToSmiles(m)]=[mol,]
+	except:
+		print '--------------------------------'
+		print 'could not process the molecule with the following name:'
+		print mol.GetProp('_Name')
+
+counter=0
+w=open('frames.smi','w')
+d=Chem.SDWriter('frames.sdf')
+for key,item in FrameDict.items():
+	counter+=1
+	#d=Chem.SDWriter(str(counter)+'.sdf')
+	for i in item:
+		i.SetProp('Scaffold',key)
+		i.SetProp('Cluster',str(counter))
+		d.write(i)
+	print key,len(item)
+	w.write(key+'\n')
+w.close
+print 'number of Frames found: %d' %(counter)
--- a/Contrib/M_Kossner/README
+++ b/Contrib/M_Kossner/README
@@ -0,0 +1,49 @@
+Contributions from Markus Kossner
+
+Descriptions:
+
+#-------------------------------
+Frames.py:
+
+Basically what the code does is a walk from every Atom that is the
+neighbor of a ring away from that ring.  If we can not reach another
+ring on the way, the atom chain will be deleted. However, as soon as
+we reach another ring, the chain will become a connecting chain in the
+scaffold. Ring Atoms [found by GetRingInfo()] as well as connecting
+non-ring chains are stored in a Chem.EditableMol-Object representing
+the Scaffold of the molecule.
+
+There is a function, that can return this scaffold in two forms that
+differ in the interpretation of what a 'Scaffold' is:
+
+- with mode='RedScaff' a Scaffold will be the pure molecular core
+  ignoring bond or atom types!  this is done by running over the
+  Scaffold (...the editable mol from above) and changing any atom to
+  carbon and any bond to a single bond
+
+- with mode='Scaff' a Scaffold will be sensitive of element and bond
+  type.  With mode='Scaff' Cyclohexyl-, phenyl or pyridyl- mojeties
+  will be different ring species.  mode='RedScaff' will interpret them
+  as six-membered rings and make all of them become a cyclohexane in
+  the scaffold representation.
+
+Usage of the script:
+- simply supply a .sdf file after the script name.
+- The program will generate a .sdf file called frames.sdf containing the
+cluster ID of the frame it belongs to as a property tag.
+- Additionally the raw frames are output to 'frames.smi'. This is a
+simple enumeration of the frames detected.
+
+Please note, that the data is preliminary stored in memory. This
+means, that very large input files may cause the consumption of large
+amounts of memory at runtime!
+
+#-------------------------------
+BaseFeatures_DIP2_NoMicrospecies.fdef
+
+After I talked to Nik at the Sheffield Conference about the difficulty
+of defining Positive/Negative ionizable features by SMARTS he asked me
+to to send my approach towards the problem.  Attached you will find
+the .fdef file I use. Hopefully there is something in there that might
+help in setting up robust feature definitions.
+