added no_aminoacid_identities

This commit is contained in:
Gabriele Corso
2023-02-05 20:17:35 -05:00
parent fff8f0b5eb
commit 47bf7e5e08
7 changed files with 223 additions and 2 deletions

View File

@@ -95,7 +95,7 @@ class TensorProductScoreModel(torch.nn.Module):
center_max_distance=30, distance_embed_dim=32, cross_distance_embed_dim=32, no_torsion=False,
scale_by_sigma=True, use_second_order_repr=False, batch_norm=True,
dynamic_max_cross=False, dropout=0.0, lm_embedding_type=None, confidence_mode=False,
confidence_dropout=0, confidence_no_batchnorm=False, num_confidence_outputs=1):
confidence_dropout=0, confidence_no_batchnorm=False, num_confidence_outputs=1, no_aminoacid_identities=False):
super(TensorProductScoreModel, self).__init__()
self.t_to_sigma = t_to_sigma
self.in_lig_edge_features = in_lig_edge_features
@@ -115,6 +115,7 @@ class TensorProductScoreModel(torch.nn.Module):
self.timestep_emb_func = timestep_emb_func
self.confidence_mode = confidence_mode
self.num_conv_layers = num_conv_layers
self.no_aminoacid_identities = no_aminoacid_identities
self.lig_node_embedding = AtomEncoder(emb_dim=ns, feature_dims=lig_feature_dims, sigma_embed_dim=sigma_embed_dim)
self.lig_edge_embedding = nn.Sequential(nn.Linear(in_lig_edge_features + sigma_embed_dim + distance_embed_dim, ns),nn.ReLU(), nn.Dropout(dropout),nn.Linear(ns, ns))
@@ -232,6 +233,9 @@ class TensorProductScoreModel(torch.nn.Module):
)
def forward(self, data):
if self.no_aminoacid_identities:
data['receptor'].x = data['receptor'].x * 0
if not self.confidence_mode:
tr_sigma, rot_sigma, tor_sigma = self.t_to_sigma(*[data.complex_t[noise_type] for noise_type in ['tr', 'rot', 'tor']])
else:

View File

@@ -81,6 +81,7 @@ def parse_train_args():
parser.add_argument('--embedding_type', type=str, default="sinusoidal", help='Type of diffusion time embedding')
parser.add_argument('--sigma_embed_dim', type=int, default=32, help='Size of the embedding of the diffusion time')
parser.add_argument('--embedding_scale', type=int, default=1000, help='Parameter of the diffusion time embedding')
parser.add_argument('--no_aminoacid_identities', action='store_true', default=False, help='')
args = parser.parse_args()
return args

View File

@@ -117,7 +117,8 @@ def get_model(args, device, t_to_sigma, no_parallel=False, confidence_mode=False
confidence_mode=confidence_mode,
num_confidence_outputs=len(
args.rmsd_classification_cutoff) + 1 if 'rmsd_classification_cutoff' in args and isinstance(
args.rmsd_classification_cutoff, list) else 1)
args.rmsd_classification_cutoff, list) else 1,
no_aminoacid_identities=args.no_aminoacid_identities if "no_aminoacid_identities" in args else False)
if device.type == 'cuda' and not no_parallel:
model = DataParallel(model)

Binary file not shown.

View File

@@ -0,0 +1,120 @@
affinity_prediction: false
all_atoms: false
asyncronous_noise_schedule: false
atom_max_neighbors: 8
atom_radius: 5
balance: false
batch_size: 32
best_model_save_frequency: 5
c_alpha_max_neighbors: 24
cache_creation_id: 6
cache_ids_to_combine:
- '1'
- '2'
- '3'
- '4'
- '5'
- '6'
cache_path: /data/rsg/nlp/hstark/ligbind/data/cacheNew
chain_cutoff: 10
ckpt: best_ema_inference_epoch_model.pt
confidence_dropout: 0.1
confidence_loss_weigth: 1
confidence_no_batchnorm: false
config: null
correct_torsion_sigmas: true
cross_distance_embed_dim: 32
cross_max_distance: 80
data_dir: /data/rsg/nlp/hstark/ligbind/data/PDBBind_processed/
dataloader_drop_last: false
dataset: pdbbind
dedup_func: min
different_schedules: false
distance_embed_dim: 32
dropout: 0.1
dynamic_max_cross: true
embedding_scale: 10000
embedding_type: sinusoidal
esm_embeddings_path: null
high_confidence_threshold: 5.0
include_confidence_prediction: false
inf_sched_alpha: 1.0
inf_sched_beta: 1.0
inference_steps: 20
limit_complexes: 0
lm_embeddings_path: null
log_dir: workdir
lr: 0.0003
main_metric: loss
main_metric_goal: min
matching_maxiter: 20
matching_popsize: 20
max_lig_size: null
max_radius: 5.0
model_save_frequency: 0
multiplicity: 1
n_epochs: 100
no_aminoacid_identities: true
no_batch_norm: false
no_torsion: false
norm_by_sigma: false
normalize_affinity: false
not_fixed_knn_radius_graph: false
not_full_dataset: false
not_knn_only_graph: false
ns: 24
num_conformers: 1
num_conv_layers: 5
num_workers: 1
nv: 6
odd_parity: false
original_model_dir: /data/rsg/nlp/hstark/ligbind/workdir/restart_big_noAminoId
parallel: 1
parallel_aggregators: mean max min std
project: ligbind_filtering
protein_file: protein_processed
rank_affinity: false
rank_cutoff: null
receptor_radius: 15.0
remove_hs: true
restart_dir: null
rmsd_classification_cutoff: 2
rmsd_prediction: false
rot_alpha: 1
rot_beta: 1
rot_inf_sched_alpha: 1
rot_inf_sched_beta: 1
rot_sigma_schedule: expbeta
rot_sigmoid_schedule: false
run_name: noAminoId_confidence_l5s24v6_FILTERFROM_restart_big_noAminoId
samples_per_complex: 3
sampling_alpha: 1
sampling_beta: 1
scale_by_sigma: true
schedule_k: 10
schedule_m: 0.4
scheduler: plateau
scheduler_patience: 20
separate_noise_schedule: false
sigma_embed_dim: 32
sigma_schedule: expbeta
smooth_edges: false
split_test: data/splits/timesplit_test
split_train: data/splits/timesplit_no_lig_overlap_train
split_val: data/splits/timesplit_no_lig_overlap_val
temp_psi: 0.0
temp_sampling: 1.0
temp_sigma_data: 0.5
tor_alpha: 1
tor_beta: 1
tor_inf_sched_alpha: 1
tor_inf_sched_beta: 1
tor_sigma_schedule: expbeta
tp_attention: false
tr_only_confidence: true
train_sampling: linear
transfer_weights: false
use_original_model_cache: false
use_second_order_repr: false
w_decay: 0.0
wandb: true

View File

@@ -0,0 +1,95 @@
all_atoms: false
asyncronous_noise_schedule: false
atom_max_neighbors: 8
atom_radius: 5
batch_size: 16
c_alpha_max_neighbors: 24
cache_path: /data/rsg/nlp/hstark/ligbind/data/cacheNew
chain_cutoff: 10
confidence_dropout: 0.0
confidence_no_batchnorm: false
confidence_weight: 0.33
config: null
cross_distance_embed_dim: 64
cross_max_distance: 80
cudnn_benchmark: true
data_dir: data/PDBBind_processed/
dataloader_drop_last: false
distance_embed_dim: 64
dropout: 0.1
dynamic_max_cross: true
ema_rate: 0.999
embedding_scale: 1000
embedding_type: sinusoidal
esm_embeddings_path: null
high_confidence_threshold: 5.0
include_confidence_prediction: false
inf_pocket_cutoff: 5
inf_pocket_knowledge: false
inference_earlystop_goal: max
inference_earlystop_metric: valinf_rmsds_lt2
inference_steps: 20
limit_complexes: 0
log_dir: /data/rsg/nlp/hstark/ligbind/workdir
lr: 0.001
matching_maxiter: 20
matching_popsize: 20
max_lig_size: null
max_radius: 5.0
n_epochs: 1000
no_aminoacid_identities: true
no_batch_norm: false
no_torsion: false
norm_by_sigma: false
not_fixed_center_conv: false
not_full_dataset: false
ns: 48
num_conformers: 1
num_conv_layers: 6
num_dataloader_workers: 1
num_inference_complexes: 500
num_workers: 1
nv: 10
odd_parity: false
pin_memory: true
pocket_mode_graph: false
project: ligbind_train
protein_file: protein_processed
receptor_radius: 15.0
remove_hs: true
restart_dir: /data/rsg/nlp/hstark/ligbind/workdir/big_noAminoId
restart_lr: null
rot_alpha: 1.0
rot_beta: 1.0
rot_sigma_max: 1.55
rot_sigma_min: 0.03
rot_weight: 0.33
run_name: restart_big_noAminoId
sampling_alpha: 2.0
sampling_beta: 1.0
scale_by_sigma: true
scheduler: plateau
scheduler_patience: 30
separate_noise_schedule: false
sigma_embed_dim: 64
smooth_edges: false
split_test: data/splits/timesplit_test
split_train: data/splits/timesplit_no_lig_overlap_train
split_val: data/splits/timesplit_no_lig_overlap_val
test_sigma_intervals: true
tor_alpha: 1.0
tor_beta: 1.0
tor_sigma_max: 3.14
tor_sigma_min: 0.03
tor_weight: 0.33
tr_only_confidence: true
tr_sigma_max: 19.0
tr_sigma_min: 0.1
tr_weight: 0.33
train_inference_freq: null
use_ema: true
use_full_size_protein_file: false
use_second_order_repr: false
val_inference_freq: 5
w_decay: 0.0
wandb: true