Files
rdkit/Code/GraphMol/FragCatalog/Wrap/testFrags.csv
2022-04-29 08:09:34 +02:00

9.5 KiB

1# Copyright (C) 2002 Rational Discovery LLC
2#CodeDescriptionSMARTSMOE_SMARTSNotes
3##########
4# Oxygens
5##########
6fr_C=ONumber of carbonyl O[CX3]=[OX1]C=OIncludes carboxylic acids, esters, amides, etc.
7fr_C=O_noCOONumber of carbonyl O, excluding COOH[C!$(C-[OH])]=O[C!$(C-[OH])]=OExcludes COOH but not COO- (assumes no formal charge) hydroxyls. Includes esters, amides, etc.
8fr_Al_OHNumber of aliphatic hydroxyl groups[C!$(C=O)]-[OH][C!$(C=O)]-[OH]Excludes COOH but not COO- (assumes no formal charge) hydroxyls
9fr_Ar_OHNumber of aromatic hydroxyl groupsc[OH1]c[OH1]
10fr_methoxyNumber of methoxy groups -OCH3[OX2](-[#6])-[CH3]O(-[#6])-CH3Aliphatic. Includes esters. Includes OMe groups on aromatic rings.
11fr_oximeNumber of oxime groups[CX3]=[NX2]-[OX2]C=N-O
12fr_esterNumber of esters[#6][CX3](=O)[OX2H0][#6]C(=O)O-C
13fr_Al_COONumber of aliphatic carboxylic acidsC-C(=O)[O;H1,-]C-C(=O)[OH] or C-C(=O)[O-]
14fr_Ar_COONumber of Aromatic carboxylic acidec-C(=O)[O;H1,-]c-C(=O)[OH] or c-C(=O)[O-]
15fr_COONumber of carboxylic acids[#6]C(=O)[O;H,-1][#6]C(=O)[OH] or [#6]C(=O)[O-]
16fr_COO2Number of carboxylic acids[CX3](=O)[OX1H0-,OX2H1][#6]C(=O)[OH] or [#6]C(=O)[O-]
17fr_ketoneNumber of ketones[#6][CX3](=O)[#6]
18fr_etherNumber of ether oxygens (including phenoxy)[OD2]([#6])[#6]
19fr_phenolNumber of phenols[OX2H]-c1ccccc1
20fr_aldehydeNumber of aldehydes[CX3H1](=O)[#6]
21############
22# Nitrogens
23############
24fr_quatNNumber of quaternary nitrogens[$([NX4+]),$([NX4]=*)][NX4]
25fr_NH2Number of Primary amines[NH2,nH2][NH2]Include aromatic Ns
26fr_NH1Number of Secondary amines[NH1,nH1][NH1] or [nH1]
27fr_NH0Number of Tertiary amines[NH0,nH0][NH0] or [nH0]
28fr_Ar_NNumber of aromatic nitrogensnn
29fr_Ar_NHNumber of aromatic amines[nH]
30fr_anilineNumber of anilinesc-[NX3]
31fr_ImineNumber of Imines[Nv3](=C)-[#6][Nv3](=C)-[#6]Aliphatic. Excludes aromatic sp2 nitrogens.
32fr_nitrileNumber of nitriles[NX1]#[CX2]C#N
33fr_hdrzineNumber of hydrazine groups[NX3]-[NX3][NX3]-[NX3]Includes primary, secondary hydrazines, carbazides, cyclic hydrazines. Excludes hydrazones
34fr_hdrzoneNumber of hydrazone groupsC=N-[NX3]C=N-[NX3]Includes cyclic hydrazones
35fr_nitrosoNumber of nitroso groups, excluding NO2[N!$(N-O)]=O[N!$(N-O)]=OIncludes N-N=O
36fr_N-ONumber of hydroxylamine groups[N!$(N=O)](-O)-C[N!$(N=O)](-O)-C
37fr_nitroNumber of nitro groups[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]N(=O)(O)[#6]
38#fr_nitroNumber of nitro groupsN(=O)(O)[#6]N(=O)(O)[#6]
39fr_azoNumber of azo groups[#6]-N=N-[#6][#6]-N=N-[#6]Both nitrogens must be attached to a C. Excludes azides.
40fr_diazoNumber of diazo groups[N+]#N[N+]#N
41fr_azideNumber of azide groups[$(*-[NX2-]-[NX2+]#[NX1]),$(*-[NX2]=[NX2+]=[NX1-])][#6]-N=[N+]=[N-]
42fr_amideNumber of amidesC(=O)-NC(=O)-NAny amide
43fr_priamideNumber of primary amidesC(=O)-[NH2]C(=O)-[NH2]
44fr_amidineNumber of amidine groupsC(=N)(-N)-[!#7]C(=N)(-N)-[!#7]Excludes guanidine
45fr_guanidoNumber of guanidine groupsC(=N)(N)NC(=N)(N)N
46fr_NhpyrroleNumber of H-pyrrole nitrogens[nH][nH]
47fr_imideNumber of imide groupsN(-C(=O))-C=ON(-C(=O))-C=O
48fr_isocyanNumber of isocyanatesN=C=ON=C=O
49fr_isothiocyanNumber of isothiocyanatesN=C=SN=C=S
50fr_thiocyanNumber of thiocyanatesS-C#NS-C#N
51###########
52# Halogens
53###########
54fr_halogenNumber of halogens[#9,#17,#35,#53]F or I or Cl or Br
55fr_alkyl_halideNumber of alkyl halides[CX4]-[Cl,Br,I,F]
56##########
57# Sulfurs
58##########
59fr_sulfideNumber of thioether[SX2](-[#6])-C[SLp2](-[#6])-CAliphatic. Excludes sulfones, etc. Includes SMe groups on aromatic rings.
60fr_SHNumber of thiol groups[SH][SH]
61fr_C=SNumber of thiocarbonylC=[SX1]C=SIncludes thioamides, thioureas, etc.
62fr_sulfoneNumber of sulfone groupsS(=,-[OX1;+0,-1])(=,-[OX1;+0,-1])(-[#6])-[#6]S(=O)(=O)(-[#6])-[#6] or S(-[O-])(-[O-])(-[#6])-[#6]Or charged form. Excludes sulfonamides, sulfonyl Cl, etc..
63fr_sulfoneNumber of sulfone groupsS(=,-[OX1;+0,-1])(=,-[OX1;+0,-1])(-[#6])-[#6]S(=O)(=O)(-[#6])-[#6] or S(-[O-])(-[O-])(-[#6])-[#6]Or charged form. Excludes sulfonamides, sulfonyl Cl, etc..
64fr_sulfonamdNumber of sulfonamidesN-S(=,-[OX1;+0,-1])(=,-[OX1;+0,-1])-[#6]N-S(=O)(=O)-[#6] or N-S(-[O-])(-[O-])-[#6]Any sulfonamide
65fr_prisulfonamdNumber of primary sulfonamides[NH2]-S(=,-[OX1;+0;-1])(=,-[OX1;+0;-1])-[#6][NH2]-S(=O)(=O)-[#6] or [NH2]-S(-[O-])(-[O-])-[#6]
66##################################
67# Miscellaneous Functional Groups
68##################################
69fr_barbiturNumber of barbiturate groupsC1C(=O)NC(=O)NC1=OC1C(=O)NC(=O)NC1=O
70fr_ureaNumber of urea groupsC(=O)(-N)-NC(=O)(-N)-NIncludes cyclic ureas, hydroxyureas, barbiturates, etc.
71fr_term_acetyleneNumber of terminal acetylenesC#[CH]C#[CH]
72fr_imidazoleNumber of imidazole ringsn1cncc1n1cncc1Includes bicyclics
73fr_furanNumber of furan ringso1cccc1o1cccc1Includes bicyclics
74fr_thiopheneNumber of thiophene ringss1cccc1s1cccc1Includes bicyclics
75fr_thiazoleNumber of thiazole ringsc1scnc1c1scnc1Includes bicyclics
76fr_oxazoleNumber of oxazole ringsc1ocnc1c1ocnc1Includes bicyclics
77fr_pyridineNumber of pyridine ringsn1ccccc1n1ccccc1Includes bicyclics
78fr_piperdineNumber of piperdine ringsN1CCCCC1N1CCCCC1
79fr_piperzineNumber of piperzine ringsN1CCNCC1N1CCNCC1
80fr_morpholineNumber of morpholine ringsO1CCNCC1O1CCNCC1
81fr_lactamNumber of beta lactamsN1C(=O)CC1N1C(=O)CC1
82fr_lactoneNumber of cyclic esters (lactones)[C&R1](=O)[O&R1][C&R1]
83#fr_intrahbondsNumber of intramolecular H-bonds (o-OH and -C=O) + (o-NH2 and -COOH) + 8-OH/NH2 in quinolonesc1([OH])c(C(=O))aaaa1 or c1([OH])c([NH2])aaaa1 or c1([NH2])c(C(=O)[OH])aaaa1 or n1c2c([OH])cccc2ccc1 or n1c2c([NH2])cccc2ccc1This is just a guess and may be missing other possible examples
84fr_tetrazoleNumber of tetrazole ringsc1nnnn1c1nnnn1
85fr_epoxideNumber of epoxide ringsO1CC1O1CC1
86fr_unbrch_alkaneNumber of unbranched alkanes of at least 4 members (excludes halogenated alkanes)[R0;D2][R0;D2][R0;D2][R0;D2]
87fr_bicyclicBicyclic[R2][R2]
88fr_benzeneNumber of benzene ringsc1ccccc1
89#############
90# Phosphates
91#############
92fr_phos_acidNumber of phosphoric acid groups[$(P(=[OX1])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)]),$([P+]([OX1-])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)])]
93fr_phos_esterNumber of phosphoric ester groups[$(P(=[OX1])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)]),$([P+]([OX1-])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)])]
94#####################
95# Topliss Metabolism
96#####################
97fr_nitro_aromNumber of nitro benzene ring substituents[$(c1(-[$([NX3](=O)=O),$([NX3+](=O)[O-])])ccccc1)]
98#fr_nitro_aromNumber of nitro aromatic ring substituentsc-[$([NX3](=O)=O),$([NX3+](=O)[O-])]
99fr_nitro_arom_nonorthoNumber of non-ortho nitro benzene ring substituents[$(c1(-[$([NX3](=O)=O),$([NX3+](=O)[O-])])ccccc1);!$(cc-!:*)]
100fr_dihydropyridineNumber of dihydropyridines[$([NX3H1]1-C=C-C-C=C1),$([Nv3]1=C-C-C=C-C1),$([Nv3]1=C-C=C-C-C1),$([NX3H1]1-C-C=C-C=C1)]
101#fr_readily_oxidizedNumber of readily oxidized moieties: thiols+dihydropyridines
102fr_phenol_noOrthoHbondNumber of phenolic OH excluding ortho intramolecular Hbond substituents[$(c1(-[OX2H])ccccc1);!$(cc-!:[CH2]-[OX2H]);!$(cc-!:C(=O)[O;H1,-]);!$(cc-!:C(=O)-[NH2])]
103fr_Al_OH_noTertNumber of aliphatic hydroxyl groups excluding tert-OH[$(C-[OX2H]);!$([CX3](-[OX2H])=[OX1]);!$([CD4]-[OX2H])]
104fr_benzodiazepineNumber of benzodiazepines with no additional fused rings[c&R2]12[c&R1][c&R1][c&R1][c&R1][c&R2]1[N&R1][C&R1][C&R1][N&R1]=[C&R1]2
105fr_para_hydroxylationNumber of para-hydroxylation sites[$([cH]1[cH]cc(c[cH]1)~[$([#8,$([#8]~[H,c,C])])]),$([cH]1[cH]cc(c[cH]1)~[$([#7X3,$([#7](~[H,c,C])~[H,c,C])])]),$([cH]1[cH]cc(c[cH]1)-!:[$([NX3H,$(NC(=O)[H,c,C])])])]naIncludes benzofuran and benzimidazole
106#fr_para_hydroxylationNumber of para-hydroxylation sites[$([cH]1[cH]cc(c[cH]1)-!:[$([OX2,$(O-[H,c,C])])]),$([cH]1[cH]cc(c[cH]1)-!:[$([NX3,$(N([H,c,C])-[H,c,C])])]),$([cH]1[cH]cc(c[cH]1)-!:[$([NX3H,$(NC(=O)[H,c,C])])])]
107fr_allylic_oxidNumber of allylic oxidation sites excluding steroid dienone[$(C=C-C);!$(C=C-C-[N,O,S]);!$(C=C-C-C-[N,O]);!$(C12=CC(=O)CCC1C3C(C4C(CCC4)CC3)CC2)]
108fr_aryl_methylNumber of aryl methyl sites for hydroxylation[$(a-[CH3]),$(a-[CH2]-[CH3]),$(a-[CH2]-[CH2]~[!N;!O]);!$(a(:a!:*):a!:*)]
109fr_Ndealkylation1Number of XCCNR groups[$(N(-[CH3])-C-[$(C~O),$(C-a),$(C-N),$(C=C)]),$(N(-[CH2][CH3])-C-[$(C~O),$(C-a),$(C-N),$(C=C)])]
110fr_Ndealkylation2Number of tert-alicyclic amines (no heteroatoms, not quinine-like bridged N)[$([N&R1]1(-C)CCC1),$([N&R1]1(-C)CCCC1),$([N&R1]1(-C)CCCCC1),$([N&R1]1(-C)CCCCCC1),$([N&R1]1(-C)CCCCCCC1)]
111fr_alkyl_carbamateNumber of alkyl carbamates (subject to hydrolysis)C[NH1]C(=O)OC
112fr_ketone_ToplissNumber of ketones excluding diaryl, a,b-unsat. dienones, heteroatom on Calpha[$([CX3](=[OX1])(C)([c,C]));!$([CX3](=[OX1])([CH1]=C)[c,C])]
113fr_ArNNumber of N functional groups attached to aromatics[$(a-[NX3H2]),$(a-[NH1][NH2]),$(a-C(=[OX1])[NH1][NH2]),$(a-C(=[NH])[NH2])]
114fr_HOCCNNumber of C(OH)CCN-Ctert-alkyl or C(OH)CCNcyclic[$([OX2H1][CX4][CX4H2][NX3&R1]),$([OH1][CX4][CX4H2][NX3][CX4](C)(C)C)]
115###############
116# Toxicophores
117###############