added no_aminoacid_identities

2026-06-04 18:04:23 +08:00 · 2023-02-05 20:17:35 -05:00
parent fff8f0b5eb
commit 47bf7e5e08
7 changed files with 223 additions and 2 deletions
--- a/models/score_model.py
+++ b/models/score_model.py
@@ -95,7 +95,7 @@ class TensorProductScoreModel(torch.nn.Module):
                 center_max_distance=30, distance_embed_dim=32, cross_distance_embed_dim=32, no_torsion=False,
                 scale_by_sigma=True, use_second_order_repr=False, batch_norm=True,
                 dynamic_max_cross=False, dropout=0.0, lm_embedding_type=None, confidence_mode=False,
-                 confidence_dropout=0, confidence_no_batchnorm=False, num_confidence_outputs=1):
+                 confidence_dropout=0, confidence_no_batchnorm=False, num_confidence_outputs=1, no_aminoacid_identities=False):
        super(TensorProductScoreModel, self).__init__()
        self.t_to_sigma = t_to_sigma
        self.in_lig_edge_features = in_lig_edge_features
@@ -115,6 +115,7 @@ class TensorProductScoreModel(torch.nn.Module):
        self.timestep_emb_func = timestep_emb_func
        self.confidence_mode = confidence_mode
        self.num_conv_layers = num_conv_layers
+        self.no_aminoacid_identities = no_aminoacid_identities

        self.lig_node_embedding = AtomEncoder(emb_dim=ns, feature_dims=lig_feature_dims, sigma_embed_dim=sigma_embed_dim)
        self.lig_edge_embedding = nn.Sequential(nn.Linear(in_lig_edge_features + sigma_embed_dim + distance_embed_dim, ns),nn.ReLU(), nn.Dropout(dropout),nn.Linear(ns, ns))
@@ -232,6 +233,9 @@ class TensorProductScoreModel(torch.nn.Module):
                )

    def forward(self, data):
+        if self.no_aminoacid_identities:
+            data['receptor'].x = data['receptor'].x * 0
+
        if not self.confidence_mode:
            tr_sigma, rot_sigma, tor_sigma = self.t_to_sigma(*[data.complex_t[noise_type] for noise_type in ['tr', 'rot', 'tor']])
        else:
--- a/utils/parsing.py
+++ b/utils/parsing.py
@@ -81,6 +81,7 @@ def parse_train_args():
    parser.add_argument('--embedding_type', type=str, default="sinusoidal", help='Type of diffusion time embedding')
    parser.add_argument('--sigma_embed_dim', type=int, default=32, help='Size of the embedding of the diffusion time')
    parser.add_argument('--embedding_scale', type=int, default=1000, help='Parameter of the diffusion time embedding')
+    parser.add_argument('--no_aminoacid_identities', action='store_true', default=False, help='')

    args = parser.parse_args()
    return args
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -117,7 +117,8 @@ def get_model(args, device, t_to_sigma, no_parallel=False, confidence_mode=False
                        confidence_mode=confidence_mode,
                        num_confidence_outputs=len(
                            args.rmsd_classification_cutoff) + 1 if 'rmsd_classification_cutoff' in args and isinstance(
-                            args.rmsd_classification_cutoff, list) else 1)
+                            args.rmsd_classification_cutoff, list) else 1,
+                        no_aminoacid_identities=args.no_aminoacid_identities if "no_aminoacid_identities" in args else False)

    if device.type == 'cuda' and not no_parallel:
        model = DataParallel(model)
--- a/workdir/masked_confidence_model/best_model.pt
+++ b/workdir/masked_confidence_model/best_model.pt
--- a/workdir/masked_confidence_model/model_parameters.yml
+++ b/workdir/masked_confidence_model/model_parameters.yml
@@ -0,0 +1,120 @@
+affinity_prediction: false
+all_atoms: false
+asyncronous_noise_schedule: false
+atom_max_neighbors: 8
+atom_radius: 5
+balance: false
+batch_size: 32
+best_model_save_frequency: 5
+c_alpha_max_neighbors: 24
+cache_creation_id: 6
+cache_ids_to_combine:
+- '1'
+- '2'
+- '3'
+- '4'
+- '5'
+- '6'
+cache_path: /data/rsg/nlp/hstark/ligbind/data/cacheNew
+chain_cutoff: 10
+ckpt: best_ema_inference_epoch_model.pt
+confidence_dropout: 0.1
+confidence_loss_weigth: 1
+confidence_no_batchnorm: false
+config: null
+correct_torsion_sigmas: true
+cross_distance_embed_dim: 32
+cross_max_distance: 80
+data_dir: /data/rsg/nlp/hstark/ligbind/data/PDBBind_processed/
+dataloader_drop_last: false
+dataset: pdbbind
+dedup_func: min
+different_schedules: false
+distance_embed_dim: 32
+dropout: 0.1
+dynamic_max_cross: true
+embedding_scale: 10000
+embedding_type: sinusoidal
+esm_embeddings_path: null
+high_confidence_threshold: 5.0
+include_confidence_prediction: false
+inf_sched_alpha: 1.0
+inf_sched_beta: 1.0
+inference_steps: 20
+limit_complexes: 0
+lm_embeddings_path: null
+log_dir: workdir
+lr: 0.0003
+main_metric: loss
+main_metric_goal: min
+matching_maxiter: 20
+matching_popsize: 20
+max_lig_size: null
+max_radius: 5.0
+model_save_frequency: 0
+multiplicity: 1
+n_epochs: 100
+no_aminoacid_identities: true
+no_batch_norm: false
+no_torsion: false
+norm_by_sigma: false
+normalize_affinity: false
+not_fixed_knn_radius_graph: false
+not_full_dataset: false
+not_knn_only_graph: false
+ns: 24
+num_conformers: 1
+num_conv_layers: 5
+num_workers: 1
+nv: 6
+odd_parity: false
+original_model_dir: /data/rsg/nlp/hstark/ligbind/workdir/restart_big_noAminoId
+parallel: 1
+parallel_aggregators: mean max min std
+project: ligbind_filtering
+protein_file: protein_processed
+rank_affinity: false
+rank_cutoff: null
+receptor_radius: 15.0
+remove_hs: true
+restart_dir: null
+rmsd_classification_cutoff: 2
+rmsd_prediction: false
+rot_alpha: 1
+rot_beta: 1
+rot_inf_sched_alpha: 1
+rot_inf_sched_beta: 1
+rot_sigma_schedule: expbeta
+rot_sigmoid_schedule: false
+run_name: noAminoId_confidence_l5s24v6_FILTERFROM_restart_big_noAminoId
+samples_per_complex: 3
+sampling_alpha: 1
+sampling_beta: 1
+scale_by_sigma: true
+schedule_k: 10
+schedule_m: 0.4
+scheduler: plateau
+scheduler_patience: 20
+separate_noise_schedule: false
+sigma_embed_dim: 32
+sigma_schedule: expbeta
+smooth_edges: false
+split_test: data/splits/timesplit_test
+split_train: data/splits/timesplit_no_lig_overlap_train
+split_val: data/splits/timesplit_no_lig_overlap_val
+temp_psi: 0.0
+temp_sampling: 1.0
+temp_sigma_data: 0.5
+tor_alpha: 1
+tor_beta: 1
+tor_inf_sched_alpha: 1
+tor_inf_sched_beta: 1
+tor_sigma_schedule: expbeta
+tp_attention: false
+tr_only_confidence: true
+train_sampling: linear
+transfer_weights: false
+use_original_model_cache: false
+use_second_order_repr: false
+w_decay: 0.0
+wandb: true
--- a/workdir/masked_score_model/best_ema_inference_epoch_model.pt
+++ b/workdir/masked_score_model/best_ema_inference_epoch_model.pt
--- a/workdir/masked_score_model/model_parameters.yml
+++ b/workdir/masked_score_model/model_parameters.yml
@@ -0,0 +1,95 @@
+all_atoms: false
+asyncronous_noise_schedule: false
+atom_max_neighbors: 8
+atom_radius: 5
+batch_size: 16
+c_alpha_max_neighbors: 24
+cache_path: /data/rsg/nlp/hstark/ligbind/data/cacheNew
+chain_cutoff: 10
+confidence_dropout: 0.0
+confidence_no_batchnorm: false
+confidence_weight: 0.33
+config: null
+cross_distance_embed_dim: 64
+cross_max_distance: 80
+cudnn_benchmark: true
+data_dir: data/PDBBind_processed/
+dataloader_drop_last: false
+distance_embed_dim: 64
+dropout: 0.1
+dynamic_max_cross: true
+ema_rate: 0.999
+embedding_scale: 1000
+embedding_type: sinusoidal
+esm_embeddings_path: null
+high_confidence_threshold: 5.0
+include_confidence_prediction: false
+inf_pocket_cutoff: 5
+inf_pocket_knowledge: false
+inference_earlystop_goal: max
+inference_earlystop_metric: valinf_rmsds_lt2
+inference_steps: 20
+limit_complexes: 0
+log_dir: /data/rsg/nlp/hstark/ligbind/workdir
+lr: 0.001
+matching_maxiter: 20
+matching_popsize: 20
+max_lig_size: null
+max_radius: 5.0
+n_epochs: 1000
+no_aminoacid_identities: true
+no_batch_norm: false
+no_torsion: false
+norm_by_sigma: false
+not_fixed_center_conv: false
+not_full_dataset: false
+ns: 48
+num_conformers: 1
+num_conv_layers: 6
+num_dataloader_workers: 1
+num_inference_complexes: 500
+num_workers: 1
+nv: 10
+odd_parity: false
+pin_memory: true
+pocket_mode_graph: false
+project: ligbind_train
+protein_file: protein_processed
+receptor_radius: 15.0
+remove_hs: true
+restart_dir: /data/rsg/nlp/hstark/ligbind/workdir/big_noAminoId
+restart_lr: null
+rot_alpha: 1.0
+rot_beta: 1.0
+rot_sigma_max: 1.55
+rot_sigma_min: 0.03
+rot_weight: 0.33
+run_name: restart_big_noAminoId
+sampling_alpha: 2.0
+sampling_beta: 1.0
+scale_by_sigma: true
+scheduler: plateau
+scheduler_patience: 30
+separate_noise_schedule: false
+sigma_embed_dim: 64
+smooth_edges: false
+split_test: data/splits/timesplit_test
+split_train: data/splits/timesplit_no_lig_overlap_train
+split_val: data/splits/timesplit_no_lig_overlap_val
+test_sigma_intervals: true
+tor_alpha: 1.0
+tor_beta: 1.0
+tor_sigma_max: 3.14
+tor_sigma_min: 0.03
+tor_weight: 0.33
+tr_only_confidence: true
+tr_sigma_max: 19.0
+tr_sigma_min: 0.1
+tr_weight: 0.33
+train_inference_freq: null
+use_ema: true
+use_full_size_protein_file: false
+use_second_order_repr: false
+val_inference_freq: 5
+w_decay: 0.0
+wandb: true