From a6c386235328ae5f4552d0a88495c6f58ccd18fb Mon Sep 17 00:00:00 2001 From: Kevin Wu Date: Tue, 12 Dec 2023 09:51:51 -0800 Subject: [PATCH] Unused script cleanup --- scripts/gromacs/Dockerfile | 15 -- scripts/gromacs/gromacs.py | 206 ------------------ scripts/gromacs/gromacs_docker.py | 73 ------- scripts/gromacs/mdp/ions.mdp | 15 -- scripts/gromacs/mdp/md.mdp | 46 ---- scripts/gromacs/mdp/minim.mdp | 15 -- scripts/gromacs/mdp/npt.mdp | 44 ---- scripts/gromacs/mdp/nvt.mdp | 41 ---- scripts/gromacs/mdp/prod.mdp | 41 ---- scripts/slurm/alphafold.sbatch | 21 -- scripts/slurm/omegafold_on_proteinmpnn.sbatch | 17 -- scripts/slurm/train_ar_baseline.sbatch | 17 -- .../slurm/train_cosine_discard_long.sbatch | 17 -- scripts/slurm/train_discard_long.sbatch | 17 -- 14 files changed, 585 deletions(-) delete mode 100644 scripts/gromacs/Dockerfile delete mode 100644 scripts/gromacs/gromacs.py delete mode 100644 scripts/gromacs/gromacs_docker.py delete mode 100644 scripts/gromacs/mdp/ions.mdp delete mode 100644 scripts/gromacs/mdp/md.mdp delete mode 100644 scripts/gromacs/mdp/minim.mdp delete mode 100644 scripts/gromacs/mdp/npt.mdp delete mode 100644 scripts/gromacs/mdp/nvt.mdp delete mode 100644 scripts/gromacs/mdp/prod.mdp delete mode 100644 scripts/slurm/alphafold.sbatch delete mode 100644 scripts/slurm/omegafold_on_proteinmpnn.sbatch delete mode 100644 scripts/slurm/train_ar_baseline.sbatch delete mode 100644 scripts/slurm/train_cosine_discard_long.sbatch delete mode 100644 scripts/slurm/train_discard_long.sbatch diff --git a/scripts/gromacs/Dockerfile b/scripts/gromacs/Dockerfile deleted file mode 100644 index 2e21465..0000000 --- a/scripts/gromacs/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -# Useful links: -# https://github.com/docker/buildx/issues/476 -# Sometimes need to run sudo service docker restart to build correctly -# https://stackoverflow.com/questions/50309605/reading-input-files-with-docker - -# Example command: -# nvidia-docker run -it --rm -v ${PWD}:/host_pwd --workdir /host_pwd wukevin:gromacs-latest generated_0_proteinmpnn_residues_0.pdb - -FROM nvcr.io/hpc/gromacs:2022.3 - -# Should already come with python3; just copy in files -COPY gromacs.py /usr/local/bin/gromacs.py -COPY mdp /usr/local/bin/mdp - -ENTRYPOINT [ "python3", "/usr/local/bin/gromacs.py", "--gmxbin", "/usr/local/gromacs/avx2_256/bin/gmx", "--mdp", "/usr/local/bin/mdp/"] \ No newline at end of file diff --git a/scripts/gromacs/gromacs.py b/scripts/gromacs/gromacs.py deleted file mode 100644 index 3649e14..0000000 --- a/scripts/gromacs/gromacs.py +++ /dev/null @@ -1,206 +0,0 @@ -""" -Script to run GROMACS on an input file - -Take the 20 amino acids, build a skeleton of each side chain -Map the coordinates into the glycine - -Don't even need a great "starting structure" - can be in the general proximity -GROMACS can do no hydrogen -pdb2gmx ignore h to add them in -""" - -import os -import sys -import socket -import argparse -import tempfile -import logging -import shlex -import subprocess -import shutil - -GRO_FILE_DIR = os.path.join(os.path.dirname(__file__), "mdp") -# Up the prod.mdp nsteps to 50000000 to run a longer simulation -# 6nm x 6nm x 6nm (or 7) box (currently gives you 1nm padding around the protein) -# gmx insert-molecules -f -ci -box -nmol <# of waters> - -# Run the biggest thing, use box volume and the same number of waters for other things -# Run a bunch of 128 residues and see how many water get added - -def run_gromacs( - pdb_file: str, - outdir: str = os.getcwd(), - gmx: str = "gmx", - gro_file_dir: str = GRO_FILE_DIR, - n_threads: int = 8, -) -> float: - """ - Run GROMACS on a PDB file - """ - logging.info(f"Running gromacs in outdir {outdir}") - assert os.path.isfile(pdb_file), f"File {pdb_file} not found! (pwd: {os.getcwd()})" - gro_file = os.path.join(outdir, os.path.basename(pdb_file).replace(".pdb", ".gro")) - # pdb2gmx = f"gmx pdb2gmx -f {pdb_file} -o {gro_file} -ff 6 -water tip3p" - # Puts it in a GMX format, add water and force field - # AMBER/CHARM most common for protein and protein folding. - # A force field defines all forces/energies interacting on - # a given atom. - pdb2gmx = f"{gmx} pdb2gmx -f {pdb_file} -o {gro_file} -water tip3p" - logging.debug(f"pdb2gmx cmd: {pdb2gmx}") - p = subprocess.Popen(shlex.split(pdb2gmx), stdin=subprocess.PIPE) - p.communicate(input="6".encode()) - - # gen box - put this in a water solvent 'in a box' - 1nm around the system - # box_file = os.path.join(outdir, "box.gro") - # box_cmd = f"{gmx} editconf -f {gro_file} -o {box_file} -c -d 1" - # subprocess.call(shlex.split(box_cmd)) - - # solvate - add water - # solvate_cmd = ( - # f"{gmx} solvate -cp {box_file} -o solv.gro -cs spc216.gro -p topol.top" - # ) - # logging.debug(f"solvate cmd: {solvate_cmd}") - # subprocess.call(shlex.split(solvate_cmd)) - - # Rather than automatically adding a box and solvating, add an explicit constnat number of - # water atoms. This is becasue the energy of the system is dependent on the number of atoms, - # so we hold this constant. - sol_add = 'echo "SOL 12000" >> topol.top' - subprocess.call(sol_add, shell=True) - - addwater_cmd = f"{gmx} insert-molecules -f {gro_file} -ci {gro_file_dir}water2.gro -box 12 12 12 -nmol 12000 -o solv.gro" - subprocess.call(shlex.split(addwater_cmd)) - - - # add ions - add counter postive and negative ions to make - # the box "neutral" - ions_cmd = ( - f"{gmx} grompp -f {gro_file_dir}ions.mdp -c solv.gro -o ions.tpr -p topol.top" - ) - logging.debug(f"ions cmd: {ions_cmd}") - subprocess.call(shlex.split(ions_cmd)) - - genion_cmd = f"{gmx} genion -s ions.tpr -o ions.gro -p topol.top -pname NA -nname CL -neutral" - logging.debug(f"genion cmd: {genion_cmd}") - p = subprocess.Popen(shlex.split(genion_cmd), stdin=subprocess.PIPE) - p.communicate(input="13".encode()) - - # Energy minimization - remove unfavorable contacts - # like making sure nothing is overlapping; nothing should - # change too much - em_cmd = ( - f"{gmx} grompp -f {gro_file_dir}minim.mdp -c ions.gro -o em.tpr -p topol.top" - ) - logging.debug(f"EM cmd: {em_cmd}") - subprocess.call(shlex.split(em_cmd)) - - mdrun_cmd = f"{gmx} mdrun -ntmpi 1 -ntomp {n_threads-1} -deffnm em" - logging.debug(f"mdrun cmd: {mdrun_cmd}") - subprocess.call(shlex.split(mdrun_cmd)) - - # NVT - equilibrate the system at constant volume and temperature - # come to "room temperature" - grompp_cmd = f"{gmx} grompp -f {gro_file_dir}nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr" - subprocess.call(shlex.split(grompp_cmd)) - nvt_cmd = f"{gmx} mdrun -ntmpi 1 -ntomp {n_threads - 1} -nb gpu -pin on -deffnm nvt" - subprocess.call(shlex.split(nvt_cmd)) - - # NPT - grompp_cmd = ( - f"{gmx} grompp -f {gro_file_dir}npt.mdp -c nvt.gro -o npt.tpr -p topol.top" - ) - subprocess.call(shlex.split(grompp_cmd)) - npt_cmd = f"{gmx} mdrun -ntmpi 1 -ntomp {n_threads - 1} -nb gpu -pin on -deffnm npt" - subprocess.call(shlex.split(npt_cmd)) - - # Production run - grompp_cmd = f"{gmx} grompp -f {gro_file_dir}md.mdp -c npt.gro -t npt.cpt -p topol.top -o prod.tpr" - subprocess.call(shlex.split(grompp_cmd)) - prod_cmd = ( - f"{gmx} mdrun -ntmpi 1 -ntomp {n_threads - 1} -nb gpu -pin on -deffnm prod" - ) - subprocess.call(shlex.split(prod_cmd)) - - # Produce a PDB of final structure - pdb_cmd = f"{gmx} editconf -f prod.gro -o prod.pdb" - subprocess.call(shlex.split(pdb_cmd)) - - # Read energy and return - return read_energy("prod.edr", gmx=gmx) - - -def read_energy( - energy_edr_file: str, - gmx: str = "gmx", -) -> float: - """ - Read energy from GROMACS energy file - """ - assert os.path.isfile(energy_edr_file), f"File {energy_edr_file} not found" - cmd = f"{gmx} energy -f {energy_edr_file} -o energy.xvg" - p = subprocess.Popen( - shlex.split(cmd), stdin=subprocess.PIPE, stdout=subprocess.PIPE - ) - stdout = p.communicate(input="11\n\n".encode())[0].decode().split("\n") - potential_lines = [l for l in stdout if l.startswith("Potential")] - assert len(potential_lines) == 1, "Unexpected number of potential lines" - energy = float(potential_lines[0].split()[1]) - return energy - - -def build_parser(): - """Build basic CLI parser""" - parser = argparse.ArgumentParser( - usage=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument("pdb_file", help="PDB file to run GROMACS on") - parser.add_argument( - "-o", - "--outdir", - type=str, - default=os.getcwd(), - help="Directory to write output", - ) - parser.add_argument("--copyall", action="store_true", help="Copy all GROMACS files") - parser.add_argument( - "--gmxbin", type=str, default=shutil.which("gmx"), help="GROMACS binary" - ) - parser.add_argument( - "--mdp", type=str, default=GRO_FILE_DIR, help="MDP file directory" - ) - parser.add_argument("--threads", type=int, default=32, help="Threads (minimum 2)") - return parser - - -def main(): - """Run script""" - args = build_parser().parse_args() - logging.info( - f"Running {args.pdb_file} under Python {sys.version} in {socket.gethostname()}" - ) - assert os.path.isdir(args.outdir), f"Directory {args.outdir} not found" - assert args.gmxbin is not None - args.pdb_file = os.path.abspath(args.pdb_file) - # Run in temporary directory - with tempfile.TemporaryDirectory() as tmpdir: - os.chdir(tmpdir) - energy = run_gromacs( - args.pdb_file, - tmpdir, - gmx=args.gmxbin, - gro_file_dir=args.mdp, - n_threads=args.threads, - ) - for file in os.listdir(tmpdir): - logging.debug(f"GROMACS file: {file}") - if args.copyall: - shutil.copy(os.path.join(tmpdir, file), args.outdir) - elif file.startswith("prod"): - logging.info(f"Copy {file} to {args.outdir}") - shutil.copy(os.path.join(tmpdir, file), args.outdir) - logging.info(f"{args.pdb_file} energy: {energy:.2f}") - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) - main() diff --git a/scripts/gromacs/gromacs_docker.py b/scripts/gromacs/gromacs_docker.py deleted file mode 100644 index decb134..0000000 --- a/scripts/gromacs/gromacs_docker.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -Simple script to wrap the gromacs.py file in an easy to run docker container -to avoid all the messiness of trying to do mounting and stuff. - -Usage: python gromacs_docker.py -""" -import os -import logging -import shutil -import tempfile -import subprocess -import argparse - - -def build_parser(): - """Build a basic CLI parser""" - parser = argparse.ArgumentParser() - parser.add_argument("input_file", nargs="+", help="Input file to run GROMACS on") - parser.add_argument("output_dir", help="Output dir to write output files to") - parser.add_argument("--gpu", type=int, default=0, help="GPU to use") - return parser - - -def run_gromacs_in_docker(fname: str, out_dir: str, gpu: int = 0): - """ - Run gromacs in docker - """ - assert os.path.isfile(fname), f"Input file {fname} not found" - assert shutil.which("nvidia-docker") - out_dir = os.path.abspath(out_dir) - fname = os.path.abspath(fname) - bname = os.path.splitext(os.path.basename(fname))[0] - - orig_dir = os.getcwd() - with tempfile.TemporaryDirectory() as tmpdir: - logging.info(f"Running {fname} via docker in temporary directory {tmpdir}") - assert not os.listdir(tmpdir) - os.chdir(tmpdir) - # Copy the file into the directory - shutil.copy(fname, tmpdir) - # Build and run the command - # https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#i-have-multiple-gpu-devices-how-can-i-isolate-them-between-my-containers - cmd = f"nvidia-docker run -it --rm -e NVIDIA_VISIBLE_DEVICES={gpu} -v {tmpdir}:/host_pwd --workdir /host_pwd wukevin:gromacs-latest {os.path.basename(fname)}" - logging.info(f"Running command: {cmd}") - with open(os.path.join(out_dir, f"{bname}.gromacs.stdout"), "wb") as stdout: - with open(os.path.join(out_dir, f"{bname}.gromacs.stderr"), "wb") as stderr: - subprocess.call(cmd, shell=True, stdout=stdout, stderr=stderr) - - for src_fname in os.listdir(tmpdir): - dest_fname = ( - src_fname - if src_fname.startswith(bname) - else ".".join([bname, src_fname]) - ) - logging.info(f"Copying {src_fname} to {dest_fname} in {out_dir}") - shutil.copy( - os.path.join(tmpdir, src_fname), os.path.join(out_dir, dest_fname) - ) - os.chdir(orig_dir) # Restore directory - - -def main(): - """Run script""" - args = build_parser().parse_args() - if not os.path.isdir(args.output_dir): - os.makedirs(args.output_dir) - for fname in [os.path.abspath(f) for f in args.input_file]: - run_gromacs_in_docker(fname, args.output_dir, gpu=args.gpu) - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) - main() diff --git a/scripts/gromacs/mdp/ions.mdp b/scripts/gromacs/mdp/ions.mdp deleted file mode 100644 index eb2cfc5..0000000 --- a/scripts/gromacs/mdp/ions.mdp +++ /dev/null @@ -1,15 +0,0 @@ -; ions.mdp - used as input into grompp to generate ions.tpr -; Parameters describing what to do, when to stop and what to save -integrator = steep ; Algorithm (steep = steepest descent minimization) -emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm -emstep = 0.01 ; Minimization step size -nsteps = 50000 ; Maximum number of (minimization) steps to perform - -; Parameters describing how to find the neighbors of each atom and how to calculate the interactions -nstlist = 1 ; Frequency to update the neighbor list and long range forces -cutoff-scheme = Verlet ; Buffered neighbor searching -ns_type = grid ; Method to determine neighbor list (simple, grid) -coulombtype = cutoff ; Treatment of long range electrostatic interactions -rcoulomb = 1.0 ; Short-range electrostatic cut-off -rvdw = 1.0 ; Short-range Van der Waals cut-off -pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions diff --git a/scripts/gromacs/mdp/md.mdp b/scripts/gromacs/mdp/md.mdp deleted file mode 100644 index 75080d1..0000000 --- a/scripts/gromacs/mdp/md.mdp +++ /dev/null @@ -1,46 +0,0 @@ -title = OPLS Lysozyme NPT equilibration -; Run parameters -integrator = md ; leap-frog integrator -nsteps = 500000 ; 2 * 500000 = 1000 ps (1 ns) -dt = 0.002 ; 2 fs -; Output control -nstxout = 0 ; suppress bulky .trr file by specifying -nstvout = 0 ; 0 for output frequency of nstxout, -nstfout = 0 ; nstvout, and nstfout -nstenergy = 5000 ; save energies every 10.0 ps -nstlog = 5000 ; update log file every 10.0 ps -nstxout-compressed = 5000 ; save compressed coordinates every 10.0 ps -compressed-x-grps = System ; save the whole system -; Bond parameters -continuation = yes ; Restarting after NPT -constraint_algorithm = lincs ; holonomic constraints -constraints = h-bonds ; bonds involving H are constrained -lincs_iter = 1 ; accuracy of LINCS -lincs_order = 4 ; also related to accuracy -; Neighborsearching -cutoff-scheme = Verlet ; Buffered neighbor searching -ns_type = grid ; search neighboring grid cells -nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme -rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) -rvdw = 1.0 ; short-range van der Waals cutoff (in nm) -; Electrostatics -coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics -pme_order = 4 ; cubic interpolation -fourierspacing = 0.16 ; grid spacing for FFT -; Temperature coupling is on -tcoupl = V-rescale ; modified Berendsen thermostat -tc-grps = Protein Non-Protein ; two coupling groups - more accurate -tau_t = 0.1 0.1 ; time constant, in ps -ref_t = 300 300 ; reference temperature, one for each group, in K -; Pressure coupling is on -pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT -pcoupltype = isotropic ; uniform scaling of box vectors -tau_p = 2.0 ; time constant, in ps -ref_p = 1.0 ; reference pressure, in bar -compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 -; Periodic boundary conditions -pbc = xyz ; 3-D PBC -; Dispersion correction -DispCorr = EnerPres ; account for cut-off vdW scheme -; Velocity generation -gen_vel = no ; Velocity generation is off diff --git a/scripts/gromacs/mdp/minim.mdp b/scripts/gromacs/mdp/minim.mdp deleted file mode 100644 index 1af4f96..0000000 --- a/scripts/gromacs/mdp/minim.mdp +++ /dev/null @@ -1,15 +0,0 @@ -; minim.mdp - used as input into grompp to generate em.tpr -; Parameters describing what to do, when to stop and what to save -integrator = steep ; Algorithm (steep = steepest descent minimization) -emtol = 1000.0 ; Stop minimization when the maximum force < 1000.0 kJ/mol/nm -emstep = 0.01 ; Minimization step size -nsteps = 50000 ; Maximum number of (minimization) steps to perform - -; Parameters describing how to find the neighbors of each atom and how to calculate the interactions -nstlist = 1 ; Frequency to update the neighbor list and long range forces -cutoff-scheme = Verlet ; Buffered neighbor searching -ns_type = grid ; Method to determine neighbor list (simple, grid) -coulombtype = PME ; Treatment of long range electrostatic interactions -rcoulomb = 1.0 ; Short-range electrostatic cut-off -rvdw = 1.0 ; Short-range Van der Waals cut-off -pbc = xyz ; Periodic Boundary Conditions in all 3 dimensions diff --git a/scripts/gromacs/mdp/npt.mdp b/scripts/gromacs/mdp/npt.mdp deleted file mode 100644 index eca4d72..0000000 --- a/scripts/gromacs/mdp/npt.mdp +++ /dev/null @@ -1,44 +0,0 @@ -title = OPLS Lysozyme NPT equilibration -; define = -DPOSRES ; position restrain the protein -; Run parameters -integrator = md ; leap-frog integrator -nsteps = 50000 ; 2 * 50000 = 100 ps -dt = 0.002 ; 2 fs -; Output control -nstxout = 500 ; save coordinates every 1.0 ps -nstvout = 500 ; save velocities every 1.0 ps -nstenergy = 500 ; save energies every 1.0 ps -nstlog = 500 ; update log file every 1.0 ps -; Bond parameters -continuation = yes ; Restarting after NVT -constraint_algorithm = lincs ; holonomic constraints -constraints = h-bonds ; bonds involving H are constrained -lincs_iter = 1 ; accuracy of LINCS -lincs_order = 4 ; also related to accuracy -; Nonbonded settings -cutoff-scheme = Verlet ; Buffered neighbor searching -ns_type = grid ; search neighboring grid cells -nstlist = 10 ; 20 fs, largely irrelevant with Verlet scheme -rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) -rvdw = 1.0 ; short-range van der Waals cutoff (in nm) -DispCorr = EnerPres ; account for cut-off vdW scheme -; Electrostatics -coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics -pme_order = 4 ; cubic interpolation -fourierspacing = 0.16 ; grid spacing for FFT -; Temperature coupling is on -tcoupl = V-rescale ; modified Berendsen thermostat -tc-grps = Protein Non-Protein ; two coupling groups - more accurate -tau_t = 0.1 0.1 ; time constant, in ps -ref_t = 300 300 ; reference temperature, one for each group, in K -; Pressure coupling is on -pcoupl = Parrinello-Rahman ; Pressure coupling on in NPT -pcoupltype = isotropic ; uniform scaling of box vectors -tau_p = 2.0 ; time constant, in ps -ref_p = 1.0 ; reference pressure, in bar -compressibility = 4.5e-5 ; isothermal compressibility of water, bar^-1 -refcoord_scaling = com -; Periodic boundary conditions -pbc = xyz ; 3-D PBC -; Velocity generation -gen_vel = no ; Velocity generation is off diff --git a/scripts/gromacs/mdp/nvt.mdp b/scripts/gromacs/mdp/nvt.mdp deleted file mode 100644 index 5aaec44..0000000 --- a/scripts/gromacs/mdp/nvt.mdp +++ /dev/null @@ -1,41 +0,0 @@ -title = OPLS Lysozyme NVT equilibration -;define = -DPOSRES ; position restrain the protein -; Run parameters -integrator = md ; leap-frog integrator -nsteps = 50000 ; 2 * 50000 = 100 ps -dt = 0.002 ; 2 fs -; Output control -nstxout = 500 ; save coordinates every 1.0 ps -nstvout = 500 ; save velocities every 1.0 ps -nstenergy = 500 ; save energies every 1.0 ps -nstlog = 500 ; update log file every 1.0 ps -; Bond parameters -continuation = no ; first dynamics run -constraint_algorithm = lincs ; holonomic constraints -constraints = h-bonds ; bonds involving H are constrained -lincs_iter = 1 ; accuracy of LINCS -lincs_order = 4 ; also related to accuracy -; Nonbonded settings -cutoff-scheme = Verlet ; Buffered neighbor searching -ns_type = grid ; search neighboring grid cells -nstlist = 10 ; 20 fs, largely irrelevant with Verlet -rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) -rvdw = 1.0 ; short-range van der Waals cutoff (in nm) -DispCorr = EnerPres ; account for cut-off vdW scheme -; Electrostatics -coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics -pme_order = 4 ; cubic interpolation -fourierspacing = 0.16 ; grid spacing for FFT -; Temperature coupling is on -tcoupl = V-rescale ; modified Berendsen thermostat -tc-grps = Protein Non-Protein ; two coupling groups - more accurate -tau_t = 0.1 0.1 ; time constant, in ps -ref_t = 300 300 ; reference temperature, one for each group, in K -; Pressure coupling is off -pcoupl = no ; no pressure coupling in NVT -; Periodic boundary conditions -pbc = xyz ; 3-D PBC -; Velocity generation -gen_vel = yes ; assign velocities from Maxwell distribution -gen_temp = 300 ; temperature for Maxwell distribution -gen_seed = -1 ; generate a random seed diff --git a/scripts/gromacs/mdp/prod.mdp b/scripts/gromacs/mdp/prod.mdp deleted file mode 100644 index 0f6ea32..0000000 --- a/scripts/gromacs/mdp/prod.mdp +++ /dev/null @@ -1,41 +0,0 @@ -title = OPLS Lysozyme NVT equilibration -;define = -DPOSRES ; position restrain the protein -; Run parameters -integrator = md ; leap-frog integrator -nsteps = 2500000 ; 2fs * 125000000 steps = 250 ns -dt = 0.002 ; 2 fs -; Output control -nstxout = 10000 ; save coordinates every 1.0 ps -nstvout = 10000 ; save velocities every 1.0 ps -nstenergy = 10000 ; save energies every 1.0 ps -nstlog = 10000 ; update log file every 1.0 ps -; Bond parameters -continuation = yes ; first dynamics run -constraint_algorithm = lincs ; holonomic constraints -constraints = h-bonds ; bonds involving H are constrained -lincs_iter = 1 ; accuracy of LINCS -lincs_order = 4 ; also related to accuracy -; Nonbonded settings -cutoff-scheme = Verlet ; Buffered neighbor searching -ns_type = grid ; search neighboring grid cells -nstlist = 10 ; 20 fs, largely irrelevant with Verlet -rcoulomb = 1.0 ; short-range electrostatic cutoff (in nm) -rvdw = 1.0 ; short-range van der Waals cutoff (in nm) -DispCorr = EnerPres ; account for cut-off vdW scheme -; Electrostatics -coulombtype = PME ; Particle Mesh Ewald for long-range electrostatics -pme_order = 4 ; cubic interpolation -fourierspacing = 0.16 ; grid spacing for FFT -; Temperature coupling is on -tcoupl = V-rescale ; modified Berendsen thermostat -tc-grps = Protein Non-Protein ; two coupling groups - more accurate -tau_t = 0.1 0.1 ; time constant, in ps -ref_t = 300.00 300.00 ; reference temperature, one for each group, in K -; Pressure coupling is off -pcoupl = no ; no pressure coupling in NVT -; Periodic boundary conditions -pbc = xyz ; 3-D PBC -; Velocity generation -;gen_vel = yes ; assign velocities from Maxwell distribution -;gen_temp = 300.00 ; temperature for Maxwell distribution -;gen_seed = -1 ; generate a random seed diff --git a/scripts/slurm/alphafold.sbatch b/scripts/slurm/alphafold.sbatch deleted file mode 100644 index bc9c21d..0000000 --- a/scripts/slurm/alphafold.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=jamesz -#SBATCH --job-name=alphafold # Job name -#SBATCH --mail-type=ALL # Mail events (NONE, BEGIN, END, FAIL, ALL) -#SBATCH --mail-user=wukevin@stanford.edu # Where to send mail -#SBATCH --nodes=1 # Run all processes on a single node -#SBATCH --ntasks=4 # Number of processes -#SBATCH --mem=16gb # Job memory request -#SBATCH -G 1 -#SBATCH -C GPU_SKU:RTX_2080Ti -#SBATCH --time=6-23:59:59 # Time limit -#SBATCH --output=alphafold_%j.log # Standard output and error log - -ml gcc/6.3.0 -ml cuda/11.7.1 -. /home/groups/jamesz/miniconda3/etc/profile.d/conda.sh -. ~/.bashrc - -# Expects a3m files under "input" directory, outputs to "out" direcotry -# Auto handles environment -/scratch/users/wukevin/software/localcolabfold/colabfold_batch/bin/colabfold_batch inputs out --num-recycle 15 --num-models 1 --model-order 1 --sort-queries-by random diff --git a/scripts/slurm/omegafold_on_proteinmpnn.sbatch b/scripts/slurm/omegafold_on_proteinmpnn.sbatch deleted file mode 100644 index 7ed4250..0000000 --- a/scripts/slurm/omegafold_on_proteinmpnn.sbatch +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -#SBATCH --partition=jamesz -#SBATCH --job-name=omegafold # Job name -#SBATCH --mail-type=ALL # Mail events (NONE, BEGIN, END, FAIL, ALL) -#SBATCH --mail-user=wukevin@stanford.edu # Where to send mail -#SBATCH --nodes=1 # Run all processes on a single node -#SBATCH --ntasks=10 # Number of processes -#SBATCH --mem=40gb # Job memory request -#SBATCH -G 2 -#SBATCH -C GPU_SKU:RTX_2080Ti -#SBATCH --time=6-23:59:59 # Time limit -#SBATCH --output=omegafold_%j.log # Standard output and error log -. /home/groups/jamesz/miniconda3/etc/profile.d/conda.sh -. ~/.bashrc -# Activate the appropriate conda environment -conda activate /home/groups/jamesz/wukevin/envs/omegafold -python /home/groups/jamesz/wukevin/projects/protdiff/bin/omegafold_across_gpus.py proteinmpnn_residues/*.fasta -o omegafold_predictions_proteinmpnn --weights /home/groups/jamesz/wukevin/software/omegafold/release1.pt diff --git a/scripts/slurm/train_ar_baseline.sbatch b/scripts/slurm/train_ar_baseline.sbatch deleted file mode 100644 index 556e5b2..0000000 --- a/scripts/slurm/train_ar_baseline.sbatch +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -#SBATCH --partition=jamesz -#SBATCH --job-name=training # Job name -#SBATCH --mail-type=FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL) -#SBATCH --mail-user=wukevin@stanford.edu # Where to send mail -#SBATCH --nodes=1 # Run all processes on a single node -#SBATCH --ntasks=20 # Number of processes -#SBATCH --mem=40gb # Job memory request -#SBATCH -G 2 -#SBATCH -C GPU_SKU:RTX_2080Ti -#SBATCH --time=6-23:59:59 # Time limit -#SBATCH --output=training_%j.log # Standard output and error log -. /home/groups/jamesz/miniconda3/etc/profile.d/conda.sh -. ~/.bashrc -# Activate the appropriate conda environment -conda activate /home/groups/jamesz/wukevin/envs/protdiff -python /home/groups/jamesz/wukevin/projects/protdiff/bin/train_autoregressive.py diff --git a/scripts/slurm/train_cosine_discard_long.sbatch b/scripts/slurm/train_cosine_discard_long.sbatch deleted file mode 100644 index 4406eba..0000000 --- a/scripts/slurm/train_cosine_discard_long.sbatch +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -#SBATCH --partition=jamesz -#SBATCH --job-name=training # Job name -#SBATCH --mail-type=FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL) -#SBATCH --mail-user=wukevin@stanford.edu # Where to send mail -#SBATCH --nodes=1 # Run all processes on a single node -#SBATCH --ntasks=20 # Number of processes -#SBATCH --mem=40gb # Job memory request -#SBATCH -G 2 -#SBATCH -C GPU_SKU:RTX_2080Ti -#SBATCH --time=6-23:59:59 # Time limit -#SBATCH --output=training_%j.log # Standard output and error log -. /home/groups/jamesz/miniconda3/etc/profile.d/conda.sh -. ~/.bashrc -# Activate the appropriate conda environment -conda activate /home/groups/jamesz/wukevin/envs/protdiff -python /home/groups/jamesz/wukevin/projects/protdiff/bin/train.py /home/groups/jamesz/wukevin/projects/protdiff/config_jsons/cath_full_angles_cosine_discard_long.json --dryrun diff --git a/scripts/slurm/train_discard_long.sbatch b/scripts/slurm/train_discard_long.sbatch deleted file mode 100644 index eb80cf4..0000000 --- a/scripts/slurm/train_discard_long.sbatch +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -#SBATCH --partition=jamesz -#SBATCH --job-name=training # Job name -#SBATCH --mail-type=FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL) -#SBATCH --mail-user=wukevin@stanford.edu # Where to send mail -#SBATCH --nodes=1 # Run all processes on a single node -#SBATCH --ntasks=20 # Number of processes -#SBATCH --mem=40gb # Job memory request -#SBATCH -G 2 -#SBATCH -C GPU_SKU:RTX_2080Ti -#SBATCH --time=6-23:59:59 # Time limit -#SBATCH --output=training_%j.log # Standard output and error log -. /home/groups/jamesz/miniconda3/etc/profile.d/conda.sh -. ~/.bashrc -# Activate the appropriate conda environment -conda activate /home/groups/jamesz/wukevin/envs/protdiff -python /home/groups/jamesz/wukevin/projects/protdiff/bin/train.py /home/groups/jamesz/wukevin/projects/protdiff/config_jsons/cath_full_angles_discard_long.json --dryrun