Files
rdkit/Code/Fuzz/smiles_string_to_mol_fuzzer.dict
intrigus-lgtm 8cbce48c7d Update fuzzer dict (#3162)
Remove extremely long dictionary entry.
Add additional entries.
Remove errorneous spaces before quotation mark.

Co-authored-by: intrigus <abc123zeus@live.de>
2020-05-13 06:52:36 +02:00

359 lines
3.3 KiB
Plaintext

# Dictionary for the SMILES format
# https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system
# Entries are taken from the examples on the wikipedia page
# Misc
"@"
"@@"
"%"
# Bonds
"."
"-"
"="
"#"
"$"
":"
"/"
"\\"
# Some molecules
"[H]O[H]"
"O"
"[OH2]"
"[NH4+]"
"[Ti+4]"
"[Ti++++]"
"[OH-]"
"[OH3+]"
"[Co+3]"
"[Co+++]"
# Molecules with special bonds
"O=C=O"
"C#N"
"[Ga-]$[As+]"
"[Na+].[Cl-]"
# Molecules with rings
"C1CCCCC1"
"O1CCOCC1"
"C1CCCC2C1CCCC2"
"C1CCCC2CCCCC12"
"C1=CC1"
"C=1CC1"
"C1CC=1"
"C=1CC=1"
# Not a ring, but can nevertheless be used
"C1.C2.C12"
# Aromatic molecules
"C1=CC=CC=C1"
"C:1:C:C:C:C:C1"
"c1ccccc1"
"n1ccccc1"
"o1cccc1"
"n1c[nH]cc1"
"c1ccccc1-c2ccccc2"
# Branching
"CCC(=O)O"
"FC(F)F"
"COc(c1)cccc1C#N"
"COc(cc1)ccc1C#N"
"FC(Br)(Cl)F"
"BrC(F)(F)Cl"
"C(F)(Cl)(F)Br"
# Stereochemistry
"F/C=C/F"
"F/C=C\\F"
"C/C=C/C=C/C"
"CC1CCC/C(C)=C1/C=C/C(C)=C/C=C/C(C)=C/C=C/C=C(C)/C=C/C=C(C)/C=C/C2=C(C)/CCCC2(C)C"
"NC(C)C(=O)O"
"N[CH](C)C(=O)O"
"N[C@@H](C)C(=O)O"
"N[C@H](C)C(=O)O"
"NC(C(=O)O)C"
"N[C@H](C(=O)O)C"
"C[C@H](N)C(=O)O"
"OC(=O)[C@@H](N)C"
"OC(=O)[C@H](C)N"
"[C@@H](C)(N)C(=O)O"
# Isotopes
"[14c]1ccccc1"
"[2H]C(Cl)(Cl)Cl"
"[1H]"
"[2H]"
"[3H]"
# Invalid molecules
"C=1CC-1"
"CCC=(O)O"
"[4H]"
"[4.5H]"
"[4,5H]"
"[0H]"
"[-1H]"
# Other examples
"N#N"
"[Cu+2].[O-]S(=O)(=O)[O-]"
"CN=C=O"
"O=Cc1ccc(O)c(OC)c1COCc1cc(C=O)ccc1O"
"CC(=O)NCCC1=CNc2c1cc(OC)cc2CC(=O)NCCc1c[nH]c2ccc(OC)cc12"
"CCc(c1)ccc2[n+]1ccc3c2[nH]c4c3cccc4CCc1c[n+]2ccc3c4ccccc4[nH]c3c2cc1"
"CN1CCC[C@H]1c2cccnc2"
"CCC[C@@H](O)CC\\C=C\\C=C\\C#CC#C\\C=C\\COCCC[C@@H](O)CC/C=C/C=C/C#CC#C/C=C/CO"
"CC1=C(C(=O)C[C@@H]1OC(=O)[C@@H]2[C@H](C2(C)C)/C=C(\\C)/C(=O)OC)C/C=C\\C=C"
"O1C=C[C@H]([C@H]1O2)c3c2cc(OC)c4c3OC(=O)C5=C4CCC(=O)5"
"OC[C@@H](O1)[C@@H](O)[C@H](O)[C@@H](O)[C@@H](O)1"
"OC[C@@H](O1)[C@@H](O)[C@H](O)[C@@H]2[C@@H]1c3c(O)c(OC)c(O)cc3C(=O)O2"
"CC(=O)OCCC(/C)=C\\C[C@H](C(C)=C)CCC=C"
"CC[C@H](O1)CC[C@@]12CCCO2"
"CC(C)[C@@]12C[C@@H]1[C@@H](C)C(=O)C2"
"OCCc1c(C)[n+](cs1)Cc2cnc(C)nc2N"
# Elements
"H"
"He"
"Li"
"Be"
"B"
"C"
"N"
"O"
"F"
"Ne"
"Na"
"Mg"
"Al"
"Si"
"P"
"S"
"Cl"
"Ar"
"K"
"Ca"
"Sc"
"Ti"
"V"
"Cr"
"Mn"
"Fe"
"Co"
"Ni"
"Cu"
"Zn"
"Ga"
"Ge"
"As"
"Se"
"Br"
"Kr"
"Rb"
"Sr"
"Y"
"Zr"
"Nb"
"Mo"
"Tc"
"Ru"
"Rh"
"Pd"
"Ag"
"Cd"
"In"
"Sn"
"Sb"
"Te"
"I"
"Xe"
"Cs"
"Ba"
"La"
"Ce"
"Pr"
"Nd"
"Pm"
"Sm"
"Eu"
"Gd"
"Tb"
"Dy"
"Ho"
"Er"
"Tm"
"Yb"
"Lu"
"Hf"
"Ta"
"W"
"Re"
"Os"
"Ir"
"Pt"
"Au"
"Hg"
"Tl"
"Pb"
"Bi"
"Po"
"At"
"Rn"
"Fr"
"Ra"
"Ac"
"Th"
"Pa"
"U"
"Np"
"Pu"
"Am"
"Cm"
"Bk"
"Cf"
"Es"
"Fm"
"Md"
"No"
"Lr"
"Rf"
"Db"
"Sg"
"Bh"
"Hs"
"Mt"
"Ds"
"Rg"
"Cn"
"Nh"
"Fl"
"Mc"
"Lv"
"Ts"
"Og"
"Uue"
"[H]"
"[He]"
"[Li]"
"[Be]"
"[B]"
"[C]"
"[N]"
"[O]"
"[F]"
"[Ne]"
"[Na]"
"[Mg]"
"[Al]"
"[Si]"
"[P]"
"[S]"
"[Cl]"
"[Ar]"
"[K]"
"[Ca]"
"[Sc]"
"[Ti]"
"[V]"
"[Cr]"
"[Mn]"
"[Fe]"
"[Co]"
"[Ni]"
"[Cu]"
"[Zn]"
"[Ga]"
"[Ge]"
"[As]"
"[Se]"
"[Br]"
"[Kr]"
"[Rb]"
"[Sr]"
"[Y]"
"[Zr]"
"[Nb]"
"[Mo]"
"[Tc]"
"[Ru]"
"[Rh]"
"[Pd]"
"[Ag]"
"[Cd]"
"[In]"
"[Sn]"
"[Sb]"
"[Te]"
"[I]"
"[Xe]"
"[Cs]"
"[Ba]"
"[La]"
"[Ce]"
"[Pr]"
"[Nd]"
"[Pm]"
"[Sm]"
"[Eu]"
"[Gd]"
"[Tb]"
"[Dy]"
"[Ho]"
"[Er]"
"[Tm]"
"[Yb]"
"[Lu]"
"[Hf]"
"[Ta]"
"[W]"
"[Re]"
"[Os]"
"[Ir]"
"[Pt]"
"[Au]"
"[Hg]"
"[Tl]"
"[Pb]"
"[Bi]"
"[Po]"
"[At]"
"[Rn]"
"[Fr]"
"[Ra]"
"[Ac]"
"[Th]"
"[Pa]"
"[U]"
"[Np]"
"[Pu]"
"[Am]"
"[Cm]"
"[Bk]"
"[Cf]"
"[Es]"
"[Fm]"
"[Md]"
"[No]"
"[Lr]"
"[Rf]"
"[Db]"
"[Sg]"
"[Bh]"
"[Hs]"
"[Mt]"
"[Ds]"
"[Rg]"
"[Cn]"
"[Nh]"
"[Fl]"
"[Mc]"
"[Lv]"
"[Ts]"
"[Og]"
"[Uue]"