diff --git a/.DS_Store b/.DS_Store index 4caf97e..b0c5ecc 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/README.md b/README.md index 9d45398..51a4b4e 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,19 @@ Then: ``` pip install -r requirements.txt ``` - -Next, change the paths to library & structures folders in utilities/paths.py + No other external software is necessary to run ScanNet. +Paths to the library, MSA & structures folders are defined in utilities/paths.py + +Docker images for cpu and gpu execution are also available from DockerHub here: + +``` +docker pull scannet +``` + +``` +docker pull scannet-gpu +``` ## Predicting binding sites without using evolutionary information diff --git a/preprocessing/.DS_Store b/preprocessing/.DS_Store index 4902c5f..5fade37 100644 Binary files a/preprocessing/.DS_Store and b/preprocessing/.DS_Store differ diff --git a/preprocessing/PDB_processing.py b/preprocessing/PDB_processing.py index 2f461c9..e7f366b 100644 --- a/preprocessing/PDB_processing.py +++ b/preprocessing/PDB_processing.py @@ -11,7 +11,7 @@ from utilities.paths import structures_folder,path_to_dssp,path_to_msms from preprocessing.protein_chemistry import list_atoms,list_atoms_types,VanDerWaalsRadii,atom_mass,atom_type_to_index,atom_to_index,index_to_type,atom_type_mass from preprocessing.protein_chemistry import residue_dictionary,hetresidue_field from preprocessing import PDBio - +from datetime import datetime #%% Functions for parsing PDB files. def is_residue(residue): @@ -268,9 +268,9 @@ def apply_DSSP(chain_obj, pdbparser=None, io=None, path_to_dssp=path_to_dssp): for atom in residue: atom.disordered_flag = 0 + hash = str(datetime.now())[-6:] name = 'tmp_' + pdb_id + \ - '_model_%s_chain_%s' % (model, chain) + '.pdb' - + '_model_%s_chain_%s' % (model, chain) + '_'+hash + '.pdb' io.save(name) diff --git a/preprocessing/PDBio.py b/preprocessing/PDBio.py index 13d3e78..5d171a1 100644 --- a/preprocessing/PDBio.py +++ b/preprocessing/PDBio.py @@ -331,7 +331,7 @@ def load_chains(pdb_id=None, if (file is None) & (pdb_id is not None): file = getPDB(pdb_id, biounit=biounit, structures_folder=structures_folder)[0] else: - pdb_id = 'abcd' + pdb_id = file.split('/')[-1].split('.')[0][-4:] if file[-4:] == '.cif': parser = mmcifparser diff --git a/utilities/paths.py b/utilities/paths.py index beb4875..31f7abb 100644 --- a/utilities/paths.py +++ b/utilities/paths.py @@ -47,7 +47,8 @@ elif mode == 'tau': initial_values_folder = model_folder + 'initial_values/' # Where initial values of the parameters for the gaussian kernels and residue-residue graph edges are stored. homology_folder = library_folder + 'baselines/homology/' # Where files are stored for homology baseline. path2hhblits = '/specific/netapp5_2/iscb/wolfson/sequence_database/hh-suite/build/bin/hhblits' # Path to hhblits binary. Not required if using ScanNet_noMSA networks. - path2sequence_database = '/specific/netapp5_2/iscb/wolfson/sequence_database/uniclust30_2018_08/uniclust30_2018_08' # Path to sequence database Not required if using ScanNet_noMSA networks. + # path2sequence_database = '/specific/netapp5_2/iscb/wolfson/sequence_database/uniclust30_2018_08/uniclust30_2018_08' # Path to sequence database Not required if using ScanNet_noMSA networks. + path2sequence_database = '/specific/netapp5_2/iscb/wolfson/sequence_database/uniclust30_2020_06/uniclust30_2020_06' # Path to sequence database Not required if using ScanNet_noMSA networks. path_to_dssp = '/specific/a/home/cc/students/cs/jeromet/Drive/Scripts/3D_Proteins/xssp-3.0.9/mkdssp' # Path to dssp binary. Only for reproducing baseline performance. path_to_msms = '/specific/a/home/cc/students/cs/jeromet/Drive/Scripts/3D_Proteins/msms/msms.x86_64Linux2.2.6.1' # Path to msms binary. Only for reproducing baseline performance. path_to_multiprot = '/home/iscb/wolfson/jeromet/MultiProt/multiprot.Linux' # Path to multiprot executable. Only relevant for homology baseline @@ -63,7 +64,8 @@ elif mode == 'tau_webserver': initial_values_folder = model_folder + 'initial_values/' # Where initial values of the parameters for the gaussian kernels and residue-residue graph edges are stored. homology_folder = library_folder + 'baselines/homology/' # Where files are stored for homology baseline. path2hhblits = '/specific/netapp5_2/iscb/wolfson/sequence_database/hh-suite/build/bin/hhblits' # Path to hhblits binary. Not required if using ScanNet_noMSA networks. - path2sequence_database = '/specific/netapp5_2/iscb/wolfson/sequence_database/uniclust30_2018_08/uniclust30_2018_08' # Path to sequence database Not required if using ScanNet_noMSA networks. + # path2sequence_database = '/specific/netapp5_2/iscb/wolfson/sequence_database/uniclust30_2018_08/uniclust30_2018_08' # Path to sequence database Not required if using ScanNet_noMSA networks. + path2sequence_database = '/specific/netapp5_2/iscb/wolfson/sequence_database/uniclust30_2020_06/uniclust30_2020_06' # Path to sequence database Not required if using ScanNet_noMSA networks. path_to_dssp = '/specific/a/home/cc/students/cs/jeromet/Drive/Scripts/3D_Proteins/xssp-3.0.9/mkdssp' # Path to dssp binary. Only for reproducing baseline performance. path_to_msms = '/specific/a/home/cc/students/cs/jeromet/Drive/Scripts/3D_Proteins/msms/msms.x86_64Linux2.2.6.1' # Path to msms binary. Only for reproducing baseline performance. path_to_multiprot = None # Path to multiprot executable. Only relevant for homology baseline diff --git a/utilities/paths_github.py b/utilities/paths_github.py index 4ea9d1f..27a3463 100644 --- a/utilities/paths_github.py +++ b/utilities/paths_github.py @@ -1,8 +1,8 @@ # List of paths to folders and binaries. All folder paths should finish with slash (/) # Paths required for prediction. -library_folder = '/path/to/ScanNet/' # Where the Github Repo is located. -structures_folder = '/path/to/PDB/' # Where pdb/mmCIF structures files are stored. +library_folder = '' # Where the Github Repo is located. +structures_folder = library_folder + 'PDB/' # Where pdb/mmCIF structures files are stored. predictions_folder = library_folder + 'predictions/' # Output folder. model_folder = library_folder + 'models/' # Where the networks as stored as pairs of files (.h5,.data).