mirror of
https://github.com/rdk/p2rank.git
synced 2026-06-04 12:44:24 +08:00
343 lines
7.6 KiB
Groovy
343 lines
7.6 KiB
Groovy
import cz.siret.prank.features.weight.WeightFun
|
|
import cz.siret.prank.program.params.Params
|
|
|
|
(params as Params).with {
|
|
|
|
/**
|
|
* define this if you want dataset program parameters to be evaluated relative to this directory
|
|
* (set absolute path or path relative to install dir, null defaults to working dir)
|
|
*/
|
|
dataset_base_dir = "./test_data"
|
|
|
|
/**
|
|
* all output of the prorgam will be stored in subdirectores of this directory
|
|
* (set absolute path or path relative to install dir, null defaults to working dir)
|
|
*/
|
|
output_base_dir = "./test_output"
|
|
|
|
/**
|
|
* default model
|
|
* (set path relative to install_dir/models/)
|
|
*/
|
|
model = "default.model"
|
|
|
|
/**
|
|
* Random seed
|
|
*/
|
|
seed = 42
|
|
|
|
parallel = true
|
|
|
|
/**
|
|
* Number of computing threads
|
|
*/
|
|
threads = Runtime.getRuntime().availableProcessors() + 1
|
|
|
|
/**
|
|
* Number of folds to work on simultaneously
|
|
*/
|
|
crossval_threads = 1
|
|
|
|
/**
|
|
* defines witch atoms around the ligand are considered to be part of the pocket
|
|
* ligands with longer min. contact distance are considered irrelevant
|
|
*/
|
|
ligand_protein_contact_distance = 4
|
|
|
|
//== FAETURES
|
|
|
|
/**
|
|
* include volsite pharmacophore properties
|
|
*/
|
|
use_volsite_features = true
|
|
|
|
extra_features = ["protrusion","bfactor"]
|
|
|
|
atom_table_features = ["ap5sasaValids","ap5sasaInvalids"] // "apRawValids","apRawInvalids","atomicHydrophobicity"
|
|
|
|
atom_table_feat_pow = 2
|
|
|
|
/**
|
|
* dummy param to preserve behaviour of older versions
|
|
*/
|
|
atom_table_feat_keep_sgn = false
|
|
|
|
residue_table_features = [] // ['aa5fact1','aa5fact2','aa5fact3','aa5fact4','aa5fact5']
|
|
|
|
protrusion_radius = 10
|
|
|
|
//== CLASSIFIERS ===================
|
|
|
|
/**
|
|
* see ClassifierOption
|
|
*/
|
|
classifier = "FastRandomForest"
|
|
|
|
meta_classifier_iterations = 5
|
|
|
|
/**
|
|
* works only with classifier "CostSensitive_RF"
|
|
*/
|
|
false_positive_cost = 2
|
|
|
|
//=== Random Forests =================
|
|
|
|
/**
|
|
* RandomForest trees
|
|
*/
|
|
rf_trees = 100
|
|
|
|
/**
|
|
* RandomForest depth limit, 0=unlimited
|
|
*/
|
|
rf_depth = 0
|
|
|
|
/**
|
|
* RandomForest feature subset size for one tree, 0=default(sqrt)
|
|
*/
|
|
rf_features = 0
|
|
|
|
/**
|
|
* number of threads used in RandomForest training (0=use value of threads param)
|
|
*/
|
|
rf_threads = 0
|
|
|
|
/**
|
|
* cutoff for joining ligand atom groups into one ligand
|
|
*/
|
|
ligand_clustering_distance = 1.7 // covalent bond length
|
|
|
|
/**
|
|
* cutoff around ligand that defines positives
|
|
*/
|
|
positive_point_ligand_distance = 2.5
|
|
|
|
/**
|
|
* points between [positive_point_ligand_distance,neutral_point_margin] will be left out form training
|
|
*/
|
|
neutral_points_margin = 5.5
|
|
|
|
mask_unknown_residues = true
|
|
|
|
/**
|
|
* chem. properties representation neighbourhood radius in A
|
|
*/
|
|
neighbourhood_radius = 8
|
|
|
|
/**
|
|
* HETATM groups that are considered cofactor and ignored
|
|
*/
|
|
ignore_het_groups = ["HOH","DOD","WAT","NAG","MAN","UNK","GLC","ABA","MPD","GOL","SO4","PO4"] as Set
|
|
|
|
/**
|
|
* positive podefining ligand types acceped values: "relevant", "ignored", "small", "distant"
|
|
*/
|
|
positive_def_ligtypes = ["relevant"]
|
|
|
|
/**
|
|
* min. heavy atom count for ligand, other ligands ignored
|
|
*/
|
|
min_ligand_atoms = 5
|
|
|
|
point_sampler = "SurfacePointSampler"
|
|
|
|
/**
|
|
* multiplier for random posampling
|
|
*/
|
|
sampling_multiplier = 3
|
|
|
|
/**
|
|
* solvent radius for Connolly surface
|
|
*/
|
|
solvent_radius = 1.6
|
|
|
|
/**
|
|
* Connolly potessellation (~density) used in pradiction step
|
|
*/
|
|
tessellation = 2
|
|
|
|
/**
|
|
* Connolly potessellation (~density) used in training step
|
|
*/
|
|
train_tessellation = 2
|
|
|
|
// for grid and random sampling
|
|
point_min_distfrom_protein = 2.5
|
|
point_max_distfrom_pocket = 4.5
|
|
|
|
/* for GridPointSampler */
|
|
grid_cell_edge = 2
|
|
|
|
/**
|
|
* Restrict training set size, 0=unlimited
|
|
*/
|
|
max_train_instances = 0
|
|
|
|
weight_power = 2
|
|
weight_sigma = 2.2
|
|
weight_dist_param = 4.5
|
|
|
|
weight_function = WeightFun.Option.INV
|
|
|
|
deep_surrounding = false
|
|
|
|
/** calculate feature vectors from smooth atom feature representation
|
|
* (instead of directly from atom properties)
|
|
*/
|
|
smooth_representation = false
|
|
|
|
smoothing_radius = 4.5
|
|
|
|
average_feat_vectors = false
|
|
|
|
avg_pow = 1
|
|
|
|
point_score_pow = 2
|
|
|
|
delete_models = false
|
|
|
|
delete_vectors = true
|
|
|
|
/**
|
|
* number of random seed iterations
|
|
*/
|
|
loop = 1
|
|
|
|
/**
|
|
* keep datasets (structures and Connolly points) in memory between crossval/seedloop iterations
|
|
*/
|
|
cache_datasets = false
|
|
|
|
/**
|
|
* calculate feature importance
|
|
* available only for some classifiers
|
|
*/
|
|
feature_importances = false
|
|
|
|
/**
|
|
* produce pymol visualisations
|
|
*/
|
|
visualizations = true
|
|
|
|
/**
|
|
* visualize all surface points (not just inner pocket points)
|
|
*/
|
|
vis_all_surface = false
|
|
|
|
/**
|
|
* copy all protein pdb files to visualization folder (making visualizations portable)
|
|
*/
|
|
vis_copy_proteins = true
|
|
|
|
/**
|
|
* use sctrictly inner pocket points or more wider pocket neighbourhood
|
|
*/
|
|
strict_inner_points = false
|
|
|
|
/**
|
|
* crossvalidation folds
|
|
*/
|
|
folds = 5
|
|
|
|
/**
|
|
* collect evaluations for top [n+0, n+1,...] pockets (n is true pocket count)
|
|
*/
|
|
eval_tolerances = [0,1,2,4,10,99]
|
|
|
|
/**
|
|
* make own prank pocket predictions (P2RANK)
|
|
*/
|
|
predictions = false
|
|
|
|
/**
|
|
* minimum ligandability score for Connolly poto be considered ligandable
|
|
*/
|
|
pred_point_threshold = 0.4
|
|
|
|
/**
|
|
* minimum cluster size (of ligandable points) for initial clustering
|
|
*/
|
|
pred_min_cluster_size = 3
|
|
|
|
/**
|
|
* clustering distance for ligandable clusters for second phase clustering
|
|
*/
|
|
pred_clustering_dist = 5
|
|
|
|
/**
|
|
* distance to extend clusters around hotspots
|
|
*/
|
|
pred_surrounding = 3.5
|
|
|
|
/**
|
|
* cuttoff distance of protein surface atoms considered as part of the pocket
|
|
*/
|
|
pred_protein_surface_cutoff = 3.5
|
|
|
|
/**
|
|
* Prefix output directory with date and time
|
|
*/
|
|
out_prefix_date = false
|
|
|
|
/**
|
|
*
|
|
*/
|
|
out_subdir = null
|
|
|
|
/**
|
|
* balance Connolly poscore weight by density
|
|
*/
|
|
balance_density = false
|
|
|
|
balance_density_radius = 2
|
|
|
|
/**
|
|
* output detailed tables for all proteins, ligands and pockets
|
|
*/
|
|
log_cases = false
|
|
|
|
/**
|
|
* cutoff for protein exposed atoms calculation (distance from connolly surface is solv.radius. + surf_cutoff)
|
|
*/
|
|
surface_additional_cutoff = 1.8
|
|
|
|
/**
|
|
* take negative points from all of the protein's surface (not just decoy pockets)
|
|
*/
|
|
train_all_surface = false
|
|
|
|
/**
|
|
* if train_all_surface=true cutoff atound ligand atoms to select negatives, 0=all
|
|
*/
|
|
train_lig_cutoff = 0
|
|
|
|
/**
|
|
* n, use only top-n pockets to select training instances, 0=all
|
|
*/
|
|
train_pockets = 0
|
|
|
|
/**
|
|
* clear secondary caches (protein surfaces etc.) when iterating params
|
|
*/
|
|
clear_sec_caches = true
|
|
|
|
/**
|
|
* clear primary caches (protein structures) when iterating params
|
|
*/
|
|
clear_prim_caches = false
|
|
|
|
/**
|
|
* acceptable distance between ligand center and closest protein atom for relevant ligands
|
|
*/
|
|
ligc_prot_dist = 5.5
|
|
|
|
rescorer = "WekaSumRescorer"
|
|
|
|
plb_rescorer_atomic = false
|
|
|
|
/**
|
|
* stop processing the datsaset on the first unrecoverable error with a dataset item
|
|
*/
|
|
fail_fast = false
|
|
}
|