From 7dccdb215f48ca5b4eae5d28d09401b42c3cfb0b Mon Sep 17 00:00:00 2001 From: Kevin Wu Date: Fri, 9 Sep 2022 10:46:37 -0700 Subject: [PATCH] Remove dependencies on biopython --- bin/sample.py | 5 +- protdiff/angles_and_coords.py | 91 ----------------------------------- protdiff/datasets.py | 1 - 3 files changed, 4 insertions(+), 93 deletions(-) diff --git a/bin/sample.py b/bin/sample.py index 7dec211..f7d65b9 100644 --- a/bin/sample.py +++ b/bin/sample.py @@ -246,7 +246,9 @@ def main(): pd.DataFrame(s, columns=train_dset.feature_names["angles"]) for s in final_sampled ] - pdb_files = write_preds_pdb_folder(sampled_dfs, all_ft_train_dset, outdir / "sampled_pdb") + pdb_files = write_preds_pdb_folder( + sampled_dfs, all_ft_train_dset, outdir / "sampled_pdb" + ) logging.info(f"Done writing main outputs! Calculating tm scores...") all_tm_scores = {} @@ -257,6 +259,7 @@ def main(): with open(outdir / "tm_scores.json", "w") as sink: json.dump(all_tm_scores, sink, indent=4) + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) main() diff --git a/protdiff/angles_and_coords.py b/protdiff/angles_and_coords.py index 9394752..ed31a4c 100644 --- a/protdiff/angles_and_coords.py +++ b/protdiff/angles_and_coords.py @@ -1,8 +1,5 @@ """ Code to convert from angles between residues to XYZ coordinates. - -Based on: -https://github.com/biopython/biopython/blob/master/Bio/PDB/ic_rebuild.py """ import os import logging @@ -13,16 +10,10 @@ import warnings import numpy as np import pandas as pd -from Bio import PDB -from Bio.PDB import PICIO, ic_rebuild from sequence_models import pdb_utils import biotite.structure as struc from biotite.structure.io.pdb import PDBFile -from biotite.structure.io.pdbx import PDBxFile - -import torch -from torch.utils.data import Dataset import nerf @@ -33,47 +24,6 @@ MINIMAL_ANGLES = ["phi", "psi", "omega"] MINIMAL_DISTS = [] -def pdb_to_pic(pdb_file: str, pic_file: str): - """ - Convert the PDB file to a PIC file - """ - parser = PDB.PDBParser(QUIET=True) - s = parser.get_structure("pdb", pdb_file) - chains = [c for c in s.get_chains()] - if len(chains) > 1: - raise NotImplementedError - chain = chains.pop() # type Bio.PDB.Chain.Chain - # print(chain.__dict__.keys()) - - # Convert to relative angles - # Calculate dihedrals, angles, bond lengths (internal coordinates) for Atom data - # Generates atomArray through init_edra - chain.atom_to_internal_coordinates() - - for res in chain.internal_coord.ordered_aa_ic_list: - # Look at only analines because that's what we generate - if res.residue.get_resname() != "ALA": - continue - # print("REF", res, type(res)) - # print(res.dihedra.keys()) - - with open(pic_file, "w") as sink: - PICIO.write_PIC(chain, sink) - - -def pic_to_pdb(pic_file: str, pdb_file: str): - """ - Read int he PIC file and convert to a PDB file - """ - with open(pic_file) as source: - f = PICIO.read_PIC(source) - f.internal_to_atom_coordinates() - - io = PDB.PDBIO() - io.set_structure(f) - io.save(pdb_file) - - def coords_to_trrosetta_angles( coords: Union[np.ndarray, Dict[str, List[List[float]]]], ) -> Optional[np.ndarray]: @@ -258,47 +208,6 @@ def canonical_distances_and_dihedrals( return pd.DataFrame({k: calc_angles[k].squeeze() for k in distances + angles}) -def sample_coords( - fname: str, - subset_residues: Optional[Collection[str]] = None, - query_atoms: List[str] = ["N", "CA", "C", "O", "CB"], -) -> List[pd.DataFrame]: - """ - Sample the atomic coordinates of Alanine atoms. Return a list of dataframes each containing these - coordinates. - - We use this to help figure out where to initialize atoms when creating a new chain - """ - atomic_coords = [] - - parser = PDB.PDBParser(QUIET=True) - s = parser.get_structure("", fname) - for chain in s.get_chains(): - residues = [ - r for r in chain.get_residues() if r.get_resname() not in ("HOH", "NA") - ] - - for res in residues: - if subset_residues is not None and res.get_resname() not in subset_residues: - continue - coords = {} - for atom in res.get_atoms(): - coords[atom.get_name()] = atom.get_coord() - all_atoms_present = True - - for atom in query_atoms: - if atom not in coords: - logging.debug(f"{atom} not found in {res.get_resname()}") - all_atoms_present = False - break - - if all_atoms_present: - atomic_coords.append( - pd.DataFrame([coords[k] for k in query_atoms], index=query_atoms) - ) - return atomic_coords - - def create_new_chain_nerf( out_fname: str, dists_and_angles: pd.DataFrame, diff --git a/protdiff/datasets.py b/protdiff/datasets.py index 13b8b83..01bf28a 100644 --- a/protdiff/datasets.py +++ b/protdiff/datasets.py @@ -296,7 +296,6 @@ class CathCanonicalAnglesDataset(Dataset): f"Computing full dataset of {len(fnames)} with {multiprocessing.cpu_count()} threads" ) # Generate dihedral angles - # https://biopython.org/docs/1.76/api/Bio.PDB.PDBParser.html pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) struct_arrays = pool.map(pfunc, fnames, chunksize=250) pool.close()