mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-03 21:44:30 +08:00
340 lines
11 KiB
Plaintext
340 lines
11 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Fingerprint Generators"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Creating and using a fingerprint generator\n",
|
|
"\n",
|
|
"Fingerprint generators can be created by using the functions that return the type of generator desired."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{541731: 1, 574497: 1, 574498: 1, 590881: 1, 590882: 1, 590945: 1, 1590306: 3, 1590307: 3, 1590369: 1, 1590370: 2, 1590401: 2, 1590402: 1, 1592354: 1, 1592355: 2}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from rdkit import Chem\n",
|
|
"from rdkit.Chem import rdFingerprintGenerator\n",
|
|
"\n",
|
|
"mol = Chem.MolFromSmiles('CC(O)C(O)(O)C')\n",
|
|
"generator = rdFingerprintGenerator.GetAtomPairGenerator()\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(non_zero)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We can set the parameters for the fingerprint while creating the generator for it."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{574497: 1, 574498: 1, 590881: 1, 590882: 1, 590945: 1, 1590306: 3, 1590369: 1, 1590370: 2, 1590401: 2, 1590402: 1, 1592354: 1}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"generator = rdFingerprintGenerator.GetAtomPairGenerator(minDistance = 1, maxDistance = 2, includeChirality = False)\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(non_zero)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We can provide the molecule dependent arguments while creating the fingerprint."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{574497: 1, 574498: 1, 590945: 1, 1590369: 1, 1590370: 2}\n",
|
|
"{590881: 1, 590882: 1, 1590306: 2, 1590401: 1, 1590402: 1}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol, fromAtoms = [1])\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(non_zero)\n",
|
|
"\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol, ignoreAtoms = [1, 5])\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(non_zero)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Types of fingerprint generators\n",
|
|
"\n",
|
|
"Currently 4 fingerprint types are supported by fingerprint generators"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Atom pair {541731: 1, 574497: 1, 574498: 1, 590881: 1, 590882: 1, 590945: 1, 1590306: 3, 1590307: 3, 1590369: 1, 1590370: 2, 1590401: 2, 1590402: 1, 1592354: 1, 1592355: 2}\n",
|
|
"Morgan {864662311: 3, 1542631284: 2, 1542633699: 1, 1741045729: 1, 2245273601: 1, 2245277810: 1, 2246728737: 2, 2782665878: 1, 2927183216: 1, 3537119515: 1, 3537123720: 1}\n",
|
|
"RDKitFingerprint {398441839: 4, 561308092: 2, 623990427: 1, 1524090560: 6, 1606685044: 2, 1636471275: 3, 1753257252: 1, 1940446997: 2, 2332326087: 1, 2880661462: 1, 2911990635: 1, 3060973103: 1, 3083228099: 1, 3473416248: 3, 3743603664: 1, 3768818763: 1, 3977409745: 3, 4274652475: 3, 4275705116: 3, 4279989780: 2}\n",
|
|
"TopologicalTorsion {4303897120: 1, 12893570080: 1, 12893831712: 2, 12893831776: 2}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"generator = rdFingerprintGenerator.GetAtomPairGenerator()\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(\"Atom pair\", non_zero)\n",
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3)\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(\"Morgan\", non_zero)\n",
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetRDKitFPGenerator()\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(\"RDKitFingerprint\", non_zero)\n",
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetTopologicalTorsionGenerator()\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(\"TopologicalTorsion\", non_zero)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Invariant generators\n",
|
|
"\n",
|
|
"It is possible to use a custom invariant generators while creating fingerprints. Invariant generators provide values to be used as invariants for each atom or bond in the molecule and these values affect the generated fingerprint."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"RDKitFingerprint {1940446997: 1, 4275705116: 2}\n",
|
|
"RDKitFingerprint {578931652: 1, 2298572045: 2}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"simpleMol = Chem.MolFromSmiles('CCC')\n",
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetRDKitFPGenerator()\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(simpleMol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(\"RDKitFingerprint\", non_zero)\n",
|
|
"\n",
|
|
"atomInvariantsGen = rdFingerprintGenerator.GetAtomPairAtomInvGen()\n",
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetRDKitFPGenerator(atomInvariantsGenerator = atomInvariantsGen)\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(simpleMol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(\"RDKitFingerprint\", non_zero)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Currently available invariants generators are:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Morgan with AtomPairAtomInvGen {33: 2, 35: 1, 36: 1, 97: 3, 523835848: 1, 618975071: 1, 2343097318: 1, 3205489706: 2, 3205489717: 1, 3205494725: 1, 3205494778: 1}\n",
|
|
"Morgan with MorganAtomInvGen {864662311: 3, 1542631284: 2, 1542633699: 1, 1741045729: 1, 2245273601: 1, 2245277810: 1, 2246728737: 2, 2782665878: 1, 2927183216: 1, 3537119515: 1, 3537123720: 1}\n",
|
|
"Morgan with MorganFeatureAtomInvGen {0: 4, 3: 3, 614176407: 1, 792807483: 1, 3205495869: 2, 3205496825: 3, 3208860345: 1}\n",
|
|
"Morgan with RDKitAtomInvGen {12: 4, 16: 3, 165450225: 1, 608338133: 1, 2705297134: 1, 3205492925: 3, 3205493174: 2}\n",
|
|
"Morgan with MorganBondInvGen {864662311: 3, 1542631284: 2, 1542633699: 1, 1741045729: 1, 2245273601: 1, 2245277810: 1, 2246728737: 2, 2782665878: 1, 2927183216: 1, 3537119515: 1, 3537123720: 1}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"atomInvariantsGen = rdFingerprintGenerator.GetAtomPairAtomInvGen()\n",
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3, atomInvariantsGenerator = atomInvariantsGen)\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(\"Morgan with AtomPairAtomInvGen\", non_zero)\n",
|
|
"\n",
|
|
"atomInvariantsGen = rdFingerprintGenerator.GetMorganAtomInvGen()\n",
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3, atomInvariantsGenerator = atomInvariantsGen)\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"# Default for Morgan FP\n",
|
|
"print(\"Morgan with MorganAtomInvGen\", non_zero)\n",
|
|
"\n",
|
|
"atomInvariantsGen = rdFingerprintGenerator.GetMorganFeatureAtomInvGen()\n",
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3, atomInvariantsGenerator = atomInvariantsGen)\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(\"Morgan with MorganFeatureAtomInvGen\", non_zero)\n",
|
|
"\n",
|
|
"atomInvariantsGen = rdFingerprintGenerator.GetRDKitAtomInvGen()\n",
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3, atomInvariantsGenerator = atomInvariantsGen)\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(\"Morgan with RDKitAtomInvGen\", non_zero)\n",
|
|
"\n",
|
|
"bondInvariantsGen = rdFingerprintGenerator.GetMorganBondInvGen()\n",
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3, bondInvariantsGenerator = bondInvariantsGen)\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(mol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"# Default for Morgan FP\n",
|
|
"print(\"Morgan with MorganBondInvGen\", non_zero)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Custom Invariants\n",
|
|
"\n",
|
|
"It is also possible to provide custom invariants instead of using a invariants generator"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{541730: 1, 558113: 2}\n",
|
|
"{16417: 2, 16418: 1}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"generator = rdFingerprintGenerator.GetAtomPairGenerator()\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(simpleMol)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(non_zero)\n",
|
|
"\n",
|
|
"customAtomInvariants = [1, 1, 1]\n",
|
|
"fingerprint = generator.GetSparseCountFingerprint(simpleMol, customAtomInvariants = customAtomInvariants)\n",
|
|
"non_zero = fingerprint.GetNonzeroElements()\n",
|
|
"\n",
|
|
"print(non_zero)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Convenience functions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Bulk fingerprint"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|