mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
568 lines
26 KiB
Python
568 lines
26 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
Implementation of Ertl's Functional Group Algorithm in RDKit
|
|
|
|
Ertl, Peter.
|
|
|
|
An algorithm to identify functional groups in organic molecules
|
|
|
|
J Cheminform (2017) 9:36; DOI: 10.1186/s13321-017-0225-z
|
|
|
|
@author: gonzalo.colmenarejo
|
|
"""
|
|
|
|
|
|
from rdkit import Chem
|
|
import re
|
|
from rdkit.Chem.Draw import rdMolDraw2D
|
|
|
|
|
|
def psmi_can(s):
|
|
# Generation of canonical pseudosmiles from pseudosmiles
|
|
s = s.replace("[R]","[Ge]").replace("[Car]", "[Pb]").replace("[Cal]", "[Sn]").replace("[Oar]", "[Po]").replace("[Nar]", "[Sb]").replace("[Sar]","[Re]")
|
|
s = s.replace("[Nar+]", "[Sb+]").replace("[Sar+]","[Re+]").replace("[Sear]","[Bi]").replace("[Tear]","[Tl]").replace("[Oar+]", "[Po+]").replace("[Nar+]", "[Sb+]")
|
|
s = s.replace("[Nar-]", "[Sb-]").replace("[Sar+]","[Re+]").replace("[Sar-]","[Re-]").replace("[Sear+]","[Bi+]").replace("[Sear-]","[Bi-]")
|
|
mol = Chem.MolFromSmiles(s)
|
|
s = Chem.MolToSmiles(mol, canonical = True, isomericSmiles = False)
|
|
s = s.replace("[Ge]","[R]").replace("[Pb]", "[Car]").replace("[Sn]", "[Cal]").replace("[Po]", "[Oar]").replace("[Sb]", "[Nar]").replace("[Re]","[Sar]")
|
|
s = s.replace("[Sb+]", "[Nar+]").replace("[Re+]","[Sar+]").replace("[Bi]","[Sear]").replace("[Tl]","[Tear]").replace("[Po+]", "[Oar+]").replace("[Sb+]", "[Nar+]")
|
|
s = s.replace("[Nar-]", "[Sb-]").replace("[Re+]","[Sar+]").replace("[Re-]","[Sar-]").replace("[Bi+]","[Sear+]").replace("[Bi-]","[Sear-]")
|
|
return s
|
|
|
|
|
|
def cval_fix(fragment):
|
|
# Fix valences of carbons
|
|
for atom in fragment.GetAtoms():
|
|
if atom.GetSymbol() == 'C':
|
|
charge = atom.GetFormalCharge()
|
|
num_bonds = sum([bond.GetBondTypeAsDouble() for bond in atom.GetBonds()])
|
|
implicit_h = atom.GetNumImplicitHs()
|
|
explicit_h = atom.GetNumExplicitHs()
|
|
total_valence = num_bonds + implicit_h + explicit_h
|
|
if charge == 0:
|
|
if total_valence != 4:
|
|
explicit_h = 4 - num_bonds - implicit_h
|
|
atom.SetNumExplicitHs(int(explicit_h))
|
|
else:
|
|
explicit_h = 4 - num_bonds - implicit_h + charge
|
|
atom.SetNumExplicitHs(int(explicit_h))
|
|
atom.UpdatePropertyCache()
|
|
return fragment
|
|
|
|
|
|
def merge(mol, marked, aset):
|
|
# Merge initially marged atoms into FGs
|
|
bset = set()
|
|
for idx in aset:
|
|
atom = mol.GetAtomWithIdx(idx)
|
|
for nbr in atom.GetNeighbors():
|
|
jdx = nbr.GetIdx()
|
|
if jdx in marked:
|
|
marked.remove(jdx)
|
|
bset.add(jdx)
|
|
if not bset:
|
|
return
|
|
merge(mol, marked, bset)
|
|
aset.update(bset)
|
|
|
|
|
|
def get_fgs(mol):
|
|
# generation of nondecorated FGs
|
|
het23c_pattern = Chem.MolFromSmarts('A=,#[!#6]')
|
|
#het23c_pattern = Chem.MolFromSmarts('[C,c]=,#[!#6]')
|
|
c23c_pattern = Chem.MolFromSmarts('C=,#[C,c]')
|
|
acetal_pattern = Chem.MolFromSmarts('[CX4](-[O,N,S])-[O,N,S]')
|
|
oxirane_pattern = Chem.MolFromSmarts('[O,N,S]1CC1')
|
|
|
|
marked = set()
|
|
for atom in mol.GetAtoms():
|
|
if ((atom.GetAtomicNum() not in (6,1)) and (atom.GetIsAromatic() is False)):
|
|
marked.add(atom.GetIdx())
|
|
|
|
for pattern in [het23c_pattern, c23c_pattern, acetal_pattern, oxirane_pattern]:
|
|
for path in mol.GetSubstructMatches(pattern):
|
|
for atomindex in path:
|
|
marked.add(atomindex)
|
|
|
|
groups = []
|
|
while marked:
|
|
grp = set([marked.pop()])
|
|
merge(mol, marked, grp)
|
|
groups.append(grp)
|
|
|
|
for atom in mol.GetAtoms():
|
|
if ((atom.GetAtomicNum() not in (6,1)) and (atom.GetIsAromatic() is True)):
|
|
lone_atom = True
|
|
for nbr in atom.GetNeighbors():
|
|
if (nbr.GetIsAromatic() == False): # Don't merge adjacent aromatic atoms into the same FG!!
|
|
for group in groups:
|
|
if nbr.GetIdx() in group:
|
|
group.add(atom.GetIdx())
|
|
lone_atom = False
|
|
if (lone_atom):
|
|
groups.append({atom.GetIdx()})
|
|
return groups
|
|
|
|
|
|
def get_carbonyl_envs(mol, functional_groups):
|
|
# Find carbonyl environments
|
|
carbonyl_pattern = Chem.MolFromSmarts('C=O')
|
|
pats_idx = mol.GetSubstructMatches(carbonyl_pattern) # tupla of sets of matched atoms
|
|
carbonyl_envs = []
|
|
for group in functional_groups:
|
|
carbonyl_env = set()
|
|
for pat_idx in pats_idx:
|
|
neigh_list = []
|
|
if (len(set(pat_idx) & group) > 0): # identify the carbonyl corresponding in the group to that SMARTS match
|
|
c_atom = mol.GetAtomWithIdx(pat_idx[0]) # Get the C of the carbonyl
|
|
neigh_list = [((pat_idx[0], x.GetIdx()), mol.GetBondBetweenAtoms(pat_idx[0], x.GetIdx()))
|
|
for x in c_atom.GetNeighbors() if ((x.GetIdx() not in group) and (x.GetAtomicNum() == 6))]
|
|
if (len(neigh_list) > 0):
|
|
carbonyl_env.update(neigh_list) #
|
|
carbonyl_envs.append(carbonyl_env)
|
|
return carbonyl_envs
|
|
|
|
|
|
def single_ns_group(mol, group):
|
|
# Find single N, S atom groups
|
|
ns_count = 0
|
|
het_count = 0
|
|
|
|
for idx in group:
|
|
atom = mol.GetAtomWithIdx(idx)
|
|
if ((atom.GetSymbol() == 'N') or (atom.GetSymbol() == 'S')):
|
|
ns_count += 1
|
|
if atom.GetSymbol() not in ('C', 'H'):
|
|
het_count += 1
|
|
|
|
return ns_count == 1 and het_count == 1
|
|
|
|
|
|
def get_freeval_envs(mol, functional_groups):
|
|
# Find environments for heteroatoms with free valences
|
|
oh_pattern = Chem.MolFromSmarts('[OX2H]')
|
|
n_pattern = Chem.MolFromSmarts('[N;X3;H1,H2]')
|
|
sh_pattern = Chem.MolFromSmarts('[SX2H]')
|
|
|
|
# Identify atoms to preserve (those Hs in OH, NH2, and SH groups)
|
|
preserve_hs = set()
|
|
for pattern in [oh_pattern, n_pattern, sh_pattern]:
|
|
matches = mol.GetSubstructMatches(pattern)
|
|
if (len(matches) > 0): # found matches
|
|
for m in matches:
|
|
group = [g for g in functional_groups if m[0] in g][0]
|
|
if ((pattern == oh_pattern) or (single_ns_group(mol, group))): # we apply the oh pattern always but the n or s patterns only with single sn groups
|
|
atom = mol.GetAtomWithIdx(m[0]) # Retrieve O, N, or S of the hydroxyl, amine, thiol
|
|
for n_atom in atom.GetNeighbors():
|
|
if (n_atom.GetAtomicNum() == 1): # Will only preserve if explicit H, if not it remains "silent"
|
|
preserve_hs.add(n_atom.GetIdx())
|
|
freeval_envs = []
|
|
for group in functional_groups:
|
|
freeval_env = set()
|
|
for idx in group:
|
|
atom = mol.GetAtomWithIdx(idx)
|
|
if ((atom.GetAtomicNum() not in [6,1]) and (atom.GetIsAromatic() == False)): # heteroatom of the group
|
|
neigh_list = [((idx, x.GetIdx()), mol.GetBondBetweenAtoms(idx, x.GetIdx()), x.GetAtomicNum())
|
|
for x in atom.GetNeighbors() if ((x.GetIdx() not in group) and (x.GetIdx() not in preserve_hs)
|
|
and (x.GetAtomicNum() in (6,1))
|
|
and (mol.GetBondBetweenAtoms(idx, x.GetIdx()).GetIsAromatic() == False))]
|
|
if (len(neigh_list) > 0):
|
|
freeval_env.update(neigh_list)
|
|
freeval_envs.append(freeval_env)
|
|
return freeval_envs, preserve_hs
|
|
|
|
|
|
def get_singleno_envs(mol, functional_groups):
|
|
# Find single N, O functional groups environments (amines vs anilines, alcohols vs phenols)
|
|
singleno_envs = []
|
|
for group in functional_groups:
|
|
singleno_env = set()
|
|
if (len(group) == 1) and (mol.GetAtomWithIdx(list(group)[0]).GetAtomicNum() in [7,8]) and not (mol.GetAtomWithIdx(list(group)[0]).GetIsAromatic()):
|
|
atom = mol.GetAtomWithIdx(list(group)[0])
|
|
if sum([1 for neigh_atom in atom.GetNeighbors() if neigh_atom.GetAtomicNum() == 6]) == 1:
|
|
for neigh_atom in atom.GetNeighbors():
|
|
if (neigh_atom.GetAtomicNum() == 6) and (mol.GetBondBetweenAtoms(atom.GetIdx(), neigh_atom.GetIdx()).GetBondType() == Chem.rdchem.BondType.SINGLE):
|
|
type_carbon = "Car" if neigh_atom.GetIsAromatic() == True else "Cal"
|
|
singleno_env.add(((list(group)[0], neigh_atom.GetIdx()),
|
|
mol.GetBondBetweenAtoms(list(group)[0], neigh_atom.GetIdx()), type_carbon))
|
|
singleno_envs.append(singleno_env)
|
|
return singleno_envs
|
|
|
|
|
|
def get_dbond2car(mol, functional_groups):
|
|
# Find atoms in functional groups double bonded to aromatic carbons
|
|
db2car_pattern = Chem.MolFromSmarts("c=*")
|
|
db2car_envs = []
|
|
matches = mol.GetSubstructMatches(db2car_pattern)
|
|
matches = tuple([x for x in matches if mol.GetBondBetweenAtoms(x[0], x[1]).GetIsAromatic() == False])
|
|
for group in functional_groups:
|
|
db2car_env = set()
|
|
for matchx in matches:
|
|
if matchx[0] in group or matchx[1] in group:
|
|
db2car_env.add(matchx[0])
|
|
db2car_envs.append(db2car_env)
|
|
return db2car_envs
|
|
|
|
|
|
def col_mol(mol, functional_groups, rad = 0.5, lw = 2, width = 300, height = 250):
|
|
# Color functional groups in molecules like Ertl's paper
|
|
cols = {"NO": (1, 0.3, 1, 0.8), # violet
|
|
"O": (1, 0.6, 0.6, 0.9), # pink
|
|
"N": (0.4, 0.7, 0.9), # blue
|
|
"X": (0, 1, 0, 0.9), # green
|
|
"har": (1, 0.65, 0, 0.9), # orange
|
|
"S(O)": (1, 1, 0.2, 0.9), # yellow
|
|
"NOS": (0.7, 0.7, 0, 0.9), # pistacchio
|
|
"CC": (0.627, 0.627, 0.627, 0.9), # grey
|
|
"P,etc": (0.5, 1, 0.8, 0.9) # cyan
|
|
}
|
|
|
|
ats_hl = {}
|
|
bds_hl = {}
|
|
rdi_hl = {}
|
|
lws_hl = {}
|
|
|
|
for fg in functional_groups:
|
|
ar = any([mol.GetAtomWithIdx(idx).GetIsAromatic() for idx in fg])
|
|
els = "".join(sorted(list(set([mol.GetAtomWithIdx(idx).GetSymbol() for idx in fg]))))
|
|
|
|
if ar == True:
|
|
col = [cols["har"]]
|
|
elif any(x in els for x in ["F", "Cl", "Br", "I"]):
|
|
col = [cols["X"]]
|
|
elif any(x in els for x in ["P", "Se", "B", "Si", "As", "Te"]):
|
|
col = [cols["P,etc"]]
|
|
elif els in ["NO", "CNO"]:
|
|
col = [cols["NO"]]
|
|
elif els in ["O", "CO"]:
|
|
col = [cols["O"]]
|
|
elif els in ["N", "CN"]:
|
|
col = [cols["N"]]
|
|
elif els in ["S","OS","COS", "CS"]:
|
|
col = [cols["S(O)"]]
|
|
elif els in ["NOS", "CNOS", "NS", "CNS"]:
|
|
col = [cols["NOS"]]
|
|
elif els == "C":
|
|
col = [cols["CC"]]
|
|
|
|
for idx in fg:
|
|
ats_hl[idx] = col
|
|
rdi_hl[idx] = rad
|
|
|
|
if len(fg) > 1:
|
|
for bond in mol.GetBonds():
|
|
at1_idx = bond.GetBeginAtomIdx()
|
|
at2_idx = bond.GetEndAtomIdx()
|
|
if at1_idx in fg and at2_idx in fg:
|
|
bds_hl[bond.GetIdx()] = col
|
|
lws_hl[bond.GetIdx()] = lw
|
|
|
|
Chem.rdDepictor.Compute2DCoords(mol)
|
|
Chem.rdDepictor.StraightenDepiction(mol)
|
|
d2d = rdMolDraw2D.MolDraw2DCairo(width,height)
|
|
dopts = d2d.drawOptions()
|
|
dopts.atomHighlightsAreCircles = False
|
|
d2d.DrawMoleculeWithHighlights(mol, "", ats_hl, bds_hl, rdi_hl, lws_hl)
|
|
d2d.FinishDrawing()
|
|
return d2d.GetDrawingText()
|
|
|
|
|
|
|
|
|
|
def get_dec_fgs(mol, verbose = False):
|
|
# Generate functional groups decorated with carbon environments
|
|
for atom in mol.GetAtoms():
|
|
atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_UNSPECIFIED)
|
|
for bond in mol.GetBonds():
|
|
bond.SetStereo(Chem.rdchem.BondStereo.STEREONONE)
|
|
mol_aux = Chem.Mol(mol)
|
|
mol = Chem.RemoveHs(mol)
|
|
mol = Chem.AddHs(mol)
|
|
fgs = get_fgs(mol)
|
|
cnes = get_carbonyl_envs(mol, fgs)
|
|
fves, pr_hs = get_freeval_envs(mol, fgs)
|
|
noes = get_singleno_envs(mol, fgs)
|
|
db2cars = get_dbond2car(mol, fgs)
|
|
|
|
## Dummyze permitted free valence neighboring Hs
|
|
for fve in fves:
|
|
if (len(fve) > 0):
|
|
for fv in fve:
|
|
if (fv[2] == 1):
|
|
#print("dummy H: " + str(fv[0][1]))
|
|
mol.GetAtomWithIdx(fv[0][1]).SetAtomicNum(0)
|
|
|
|
## Remove explicit atoms
|
|
mol = Chem.RemoveHs(mol)
|
|
|
|
## Update fves accordingly to account for changes in numbers of dummy atoms
|
|
for i in range(len(fves)):
|
|
fve = list(fves[i])
|
|
if (len(fve) > 0):
|
|
dummy_paired = []
|
|
for j in range(len(fve)):
|
|
fv = fve[j]
|
|
if (fv[2] == 1): # this entry had a H fv
|
|
for neighbor in mol.GetAtomWithIdx(fv[0][0]).GetNeighbors():
|
|
if (neighbor.GetAtomicNum() == 0):
|
|
dummy_paired.append((fv[0][0], neighbor.GetIdx()))
|
|
# dummy_idx = neighbor.GetIdx() ## Assign the updated idx
|
|
# dummy_bond = mol.GetBondBetweenAtoms(fv[0][0], dummy_idx) # Update the bond too
|
|
# new_fv = ((fv[0][0], dummy_idx), dummy_bond, 0)
|
|
# fve[j] = new_fv
|
|
dummy_paired = list(set(dummy_paired))
|
|
dummy_count = 0
|
|
for j in range(len(fve)):
|
|
fv = fve[j]
|
|
if (fv[2] == 1):
|
|
d_pair = dummy_paired[dummy_count]
|
|
fve[j] = ((d_pair), mol.GetBondBetweenAtoms(d_pair[0], d_pair[1]), 0)
|
|
dummy_count = dummy_count + 1
|
|
fves[i] = set(fve) # reassign the possibly updated fve
|
|
|
|
psmis = []
|
|
fg_mols = []
|
|
psmi_labs = []
|
|
pseudo_smiles = ""
|
|
|
|
# Loop over functional groups to generate pseudo smiles and pseudo molecules for each of them
|
|
for i, group in enumerate(fgs):
|
|
neigh_atoms = sorted(list(set([x[0][1] for x in cnes[i]] + [x[0][1] for x in fves[i]] + [x[0][1] for x in noes[i]])))
|
|
cne = cnes[i]
|
|
fve = fves[i]
|
|
noe = noes[i]
|
|
db2car = db2cars[i]
|
|
R_idxs = sorted(list(set(neigh_atoms + [x for x in db2car])))
|
|
psmi_labs = [(list(x)[0][1], list(x)[2]) for x in noe if len(noe) > 0] + [(x, "Car") for x in db2car if len(db2car) > 0]
|
|
|
|
if (len(neigh_atoms) > 0): # There is decoration applicable
|
|
|
|
## Get the bonds of the group
|
|
bonds_idx = set()
|
|
for j in range(mol.GetNumBonds()):
|
|
bond = mol.GetBondWithIdx(j)
|
|
begin_aid = bond.GetBeginAtomIdx()
|
|
end_aid = bond.GetEndAtomIdx()
|
|
if ((begin_aid in group) and (end_aid in group)):
|
|
bonds_idx.add(bond.GetIdx())
|
|
|
|
## Add bonds to carbonyl carbons
|
|
if(len(cne) > 0):
|
|
for env in cne:
|
|
bond = env[1]
|
|
bonds_idx.add(bond.GetIdx())
|
|
|
|
## Add bonds to free valence atoms
|
|
if(len(fve) > 0):
|
|
for env in fve:
|
|
bond = env[1]
|
|
bonds_idx.add(bond.GetIdx())
|
|
|
|
## Add bonds to single n, o atoms
|
|
if(len(noe) > 0):
|
|
for env in noe:
|
|
bond = env[1]
|
|
bonds_idx.add(bond.GetIdx())
|
|
|
|
# Get the fragment as a new molecule from bonds in the FG + environment atoms
|
|
amap = {}
|
|
fragment = Chem.PathToSubmol(mol, list(bonds_idx), atomMap = amap)
|
|
|
|
# Create a copy of the fragment to modify
|
|
fragment_with_r_groups = Chem.RWMol(fragment)
|
|
|
|
# Replace atoms at the specified positions with R group labels
|
|
j = 0
|
|
for idx in R_idxs:
|
|
label = "[*:" + str(j+1) + "]"
|
|
|
|
# Create a dummy atom with the R group label
|
|
r_group_atom = Chem.MolFromSmiles(label).GetAtomWithIdx(0)
|
|
r_group_atom.SetAtomMapNum(int(label[-2])) # Set the map number to match the R group number
|
|
|
|
# Replace the atom at the specified position with the R group atom
|
|
fragment_with_r_groups.ReplaceAtom(amap[R_idxs[j]], r_group_atom)
|
|
j = j + 1
|
|
|
|
# Convert the modified fragment back to a Mol object
|
|
fragment_with_r_groups = fragment_with_r_groups.GetMol()
|
|
|
|
## Fix C valences
|
|
fragment_with_r_groups = cval_fix(fragment_with_r_groups)
|
|
|
|
# Generate the canonical pseudo-SMILES
|
|
pseudo_smiles = Chem.MolToSmiles(fragment_with_r_groups, canonical=True, isomericSmiles = False)
|
|
|
|
# Generate the labeled molecule
|
|
fragment_with_r_groups = Chem.RWMol(fragment_with_r_groups)
|
|
for atom in fragment_with_r_groups.GetAtoms():
|
|
idx = atom.GetIdx() # idx of atom in fragment
|
|
map_idx = list(amap.keys())[list(amap.values()).index(idx)] # idx of atom in original molecule
|
|
if map_idx in neigh_atoms: # Check if that position is an environment atom in original molecule
|
|
if (map_idx in [x[0][1] for x in list(noe)]):
|
|
type_carbon = [x[2] for x in list(noe) if x[0][1] == map_idx][0]
|
|
fragment_with_r_groups.GetAtomWithIdx(idx).SetProp('atomLabel', type_carbon)
|
|
else:
|
|
fragment_with_r_groups.GetAtomWithIdx(idx).SetProp('atomLabel','R')
|
|
else:
|
|
if atom.GetIsAromatic():
|
|
if atom.GetAtomicNum() == 6:
|
|
fragment_with_r_groups.GetAtomWithIdx(idx).SetProp('atomLabel', "Car")
|
|
if atom.GetAtomicNum() == 7:
|
|
fragment_with_r_groups.GetAtomWithIdx(idx).SetProp('atomLabel', "Nar")
|
|
if atom.GetAtomicNum() == 8:
|
|
fragment_with_r_groups.GetAtomWithIdx(idx).SetProp('atomLabel', "Oar")
|
|
if atom.GetAtomicNum() == 16:
|
|
fragment_with_r_groups.GetAtomWithIdx(idx).SetProp('atomLabel', "Sar")
|
|
if map_idx in db2car: # check db2caromatics
|
|
fragment_with_r_groups.GetAtomWithIdx(idx).SetProp('atomLabel', "Car")
|
|
fragment_with_r_groups = fragment_with_r_groups.GetMol()
|
|
|
|
else: # No neighbors
|
|
if(len(group) == 1): # Single-atommed FG
|
|
atom = mol.GetAtomWithIdx(list(group)[0])
|
|
charge = atom.GetFormalCharge()
|
|
charge_str = "+" if charge == 1 else "-" if charge == -1 else ""
|
|
fragment = Chem.RWMol() ## Create emtpy edtiable molecule
|
|
fragment.AddAtom(Chem.Atom('*')) ## Add dummy atom to molecule
|
|
if atom.GetIsAromatic():
|
|
if atom.GetAtomicNum() == 7:
|
|
pseudo_smiles = "n" + charge_str
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "Nar" + charge_str)
|
|
if atom.GetAtomicNum() == 8:
|
|
pseudo_smiles = "o" + charge_str
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "Oar" + charge_str)
|
|
if atom.GetAtomicNum() == 16:
|
|
pseudo_smiles = "s" + charge_str
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "Sar" + charge_str)
|
|
if atom.GetAtomicNum() == 15:
|
|
pseudo_smiles = "p" + charge_str
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "Par" + charge_str)
|
|
if atom.GetAtomicNum() == 34:
|
|
pseudo_smiles = "se" + charge_str
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "Sear" + charge_str)
|
|
if atom.GetAtomicNum() == 52:
|
|
pseudo_smiles = "te" + charge_str
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "Tear" + charge_str)
|
|
else:
|
|
if atom.GetAtomicNum() == 7:
|
|
pseudo_smiles = "N" + charge_str
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "N" + charge_str)
|
|
if atom.GetAtomicNum() == 8:
|
|
pseudo_smiles = "O" + charge_str
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "O" + charge_str)
|
|
if atom.GetAtomicNum() == 16:
|
|
pseudo_smiles = "S" + charge_str
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "S" + charge_str)
|
|
if atom.GetAtomicNum() == 15:
|
|
pseudo_smiles = "P" + charge_str + "]"
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "P" + charge_str)
|
|
if atom.GetAtomicNum() == 34:
|
|
pseudo_smiles = "Se" + charge_str + "]"
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "Se" + charge_str)
|
|
if atom.GetAtomicNum() == 52:
|
|
pseudo_smiles = "Te" + charge_str + "]"
|
|
fragment.GetAtomWithIdx(0).SetProp('atomLabel', "Te" + charge_str)
|
|
fragment = fragment.GetMol()
|
|
else: # Multi-atommed FG
|
|
bonds_idx = []
|
|
for j in range(mol.GetNumBonds()):
|
|
bond = mol.GetBondWithIdx(j)
|
|
begin_aid = bond.GetBeginAtomIdx()
|
|
end_aid = bond.GetEndAtomIdx()
|
|
if begin_aid in group and end_aid in group:
|
|
bonds_idx.append(j)
|
|
|
|
# Get the fragment as a new molecule
|
|
amap = {}
|
|
fragment = Chem.PathToSubmol(mol, bonds_idx, atomMap = amap)
|
|
fragment = cval_fix(fragment)
|
|
pseudo_smiles = Chem.MolToSmiles(fragment, canonical=True, isomericSmiles = False)
|
|
for atom in fragment.GetAtoms():
|
|
idx = atom.GetIdx()
|
|
charge = atom.GetFormalCharge()
|
|
charge_str = "\u207A" if charge == 1 else "\u207B" if charge == -1 else ""
|
|
if atom.GetIsAromatic():
|
|
if atom.GetAtomicNum() == 6:
|
|
fragment.GetAtomWithIdx(idx).SetProp('atomLabel', "Car" + charge_str)
|
|
if atom.GetAtomicNum() == 7:
|
|
fragment.GetAtomWithIdx(idx).SetProp('atomLabel', "Nar" + charge_str)
|
|
if atom.GetAtomicNum() == 8:
|
|
fragment.GetAtomWithIdx(idx).SetProp('atomLabel', "Oar" + charge_str)
|
|
if atom.GetAtomicNum() == 16:
|
|
fragment.GetAtomWithIdx(idx).SetProp('atomLabel', "Sar" + charge_str)
|
|
|
|
# Parse the pseudo-smiles; first the Cars and Cals
|
|
if (len(psmi_labs) > 0):
|
|
for psmi_lab in list(psmi_labs):
|
|
pseudo_smiles = pseudo_smiles.replace("[*:" + str(R_idxs.index(psmi_lab[0]) + 1) + "]", "[" + psmi_lab[1] + "]")
|
|
|
|
|
|
# Define the pattern to match
|
|
pattern = r'\[\*:\d+\]'
|
|
|
|
# Define the replacement string
|
|
replacement = "[R]"
|
|
|
|
# Replace [R] groups
|
|
pseudo_smiles = re.sub(pattern, replacement, pseudo_smiles)
|
|
|
|
# Replace ns
|
|
pseudo_smiles = pseudo_smiles.replace("[n+]", "[Nar+]")
|
|
pseudo_smiles = pseudo_smiles.replace("[n-]", "[Nar-]")
|
|
pseudo_smiles = pseudo_smiles.replace("[n+]", "[Nar+]")
|
|
pseudo_smiles = pseudo_smiles.replace("[n-]", "[Nar-]")
|
|
pseudo_smiles = re.sub(r'(?<![MNI])n\+', '[Nar+]', pseudo_smiles)
|
|
pseudo_smiles = re.sub(r'(?<![MNI])n\-', '[Nar-]', pseudo_smiles)
|
|
pseudo_smiles = re.sub(r'(?<![MZI])n', '[Nar]', pseudo_smiles)
|
|
|
|
# Replace cs, ss, os, ps, ses, tes
|
|
pseudo_smiles = pseudo_smiles.replace("[se+]", "[Sear+]")
|
|
pseudo_smiles = pseudo_smiles.replace("[se-]", "[Sear-]")
|
|
pseudo_smiles = pseudo_smiles.replace("se+", "[Sear+]")
|
|
pseudo_smiles = pseudo_smiles.replace("se-", "[Sear-]")
|
|
pseudo_smiles = pseudo_smiles.replace("se", "[Sear]")
|
|
pseudo_smiles = pseudo_smiles.replace("[te+]", "[Tear+]")
|
|
pseudo_smiles = pseudo_smiles.replace("[te-]", "[Tear-]")
|
|
pseudo_smiles = pseudo_smiles.replace("te", "[Tear]")
|
|
pseudo_smiles = pseudo_smiles.replace("[c+]", "[Car+]")
|
|
pseudo_smiles = pseudo_smiles.replace("[c-]", "[Car-]")
|
|
pseudo_smiles = pseudo_smiles.replace("c", "[Car]")
|
|
pseudo_smiles = pseudo_smiles.replace("[s+]", "[Sar+]")
|
|
pseudo_smiles = pseudo_smiles.replace("[s-]", "[Sar-]")
|
|
pseudo_smiles = re.sub(r"(?<!A)(s\+)", "[Sar+]", pseudo_smiles) # To avoid issues with As
|
|
pseudo_smiles = re.sub(r"(?<!A)(s-)", "[Sar-]", pseudo_smiles) # To avoid issues with As
|
|
pseudo_smiles = re.sub(r"(?<!A)s", "[Sar]", pseudo_smiles) # To avoid issues with As
|
|
pseudo_smiles = pseudo_smiles.replace("o+", "[Oar+]")
|
|
pseudo_smiles = pseudo_smiles.replace("o-", "[Oar-]")
|
|
pseudo_smiles = pseudo_smiles.replace("o", "[Oar]")
|
|
pseudo_smiles = pseudo_smiles.replace("[o]", "[Oar]")
|
|
pseudo_smiles = pseudo_smiles.replace("[o+]", "[Oar+]")
|
|
pseudo_smiles = pseudo_smiles.replace("[o-]", "[Oar-]")
|
|
pseudo_smiles = pseudo_smiles.replace("[p+]", "[Par+]")
|
|
pseudo_smiles = pseudo_smiles.replace("[p-]", "[Par-]")
|
|
pseudo_smiles = pseudo_smiles.replace("p", "[Par]")
|
|
|
|
|
|
# Final canonicalization
|
|
pseudo_smiles = psmi_can(pseudo_smiles)
|
|
|
|
psmis.append(pseudo_smiles)
|
|
if (len(neigh_atoms) == 0):
|
|
fg_mols.append(fragment)
|
|
else:
|
|
fg_mols.append(fragment_with_r_groups)
|
|
if not ("H" in pseudo_smiles):
|
|
if (verbose):
|
|
print("Canonical Pseudo-SMILES:", pseudo_smiles)
|
|
|
|
img_text = col_mol(mol_aux, fgs)
|
|
return img_text, fgs, psmis, fg_mols
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|