mirror of
https://github.com/microsoft/foldingdiff.git
synced 2026-06-04 13:30:33 +08:00
Remove dependencies on biopython
This commit is contained in:
@@ -246,7 +246,9 @@ def main():
|
||||
pd.DataFrame(s, columns=train_dset.feature_names["angles"])
|
||||
for s in final_sampled
|
||||
]
|
||||
pdb_files = write_preds_pdb_folder(sampled_dfs, all_ft_train_dset, outdir / "sampled_pdb")
|
||||
pdb_files = write_preds_pdb_folder(
|
||||
sampled_dfs, all_ft_train_dset, outdir / "sampled_pdb"
|
||||
)
|
||||
|
||||
logging.info(f"Done writing main outputs! Calculating tm scores...")
|
||||
all_tm_scores = {}
|
||||
@@ -257,6 +259,7 @@ def main():
|
||||
with open(outdir / "tm_scores.json", "w") as sink:
|
||||
json.dump(all_tm_scores, sink, indent=4)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
main()
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
"""
|
||||
Code to convert from angles between residues to XYZ coordinates.
|
||||
|
||||
Based on:
|
||||
https://github.com/biopython/biopython/blob/master/Bio/PDB/ic_rebuild.py
|
||||
"""
|
||||
import os
|
||||
import logging
|
||||
@@ -13,16 +10,10 @@ import warnings
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from Bio import PDB
|
||||
from Bio.PDB import PICIO, ic_rebuild
|
||||
from sequence_models import pdb_utils
|
||||
|
||||
import biotite.structure as struc
|
||||
from biotite.structure.io.pdb import PDBFile
|
||||
from biotite.structure.io.pdbx import PDBxFile
|
||||
|
||||
import torch
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
import nerf
|
||||
|
||||
@@ -33,47 +24,6 @@ MINIMAL_ANGLES = ["phi", "psi", "omega"]
|
||||
MINIMAL_DISTS = []
|
||||
|
||||
|
||||
def pdb_to_pic(pdb_file: str, pic_file: str):
|
||||
"""
|
||||
Convert the PDB file to a PIC file
|
||||
"""
|
||||
parser = PDB.PDBParser(QUIET=True)
|
||||
s = parser.get_structure("pdb", pdb_file)
|
||||
chains = [c for c in s.get_chains()]
|
||||
if len(chains) > 1:
|
||||
raise NotImplementedError
|
||||
chain = chains.pop() # type Bio.PDB.Chain.Chain
|
||||
# print(chain.__dict__.keys())
|
||||
|
||||
# Convert to relative angles
|
||||
# Calculate dihedrals, angles, bond lengths (internal coordinates) for Atom data
|
||||
# Generates atomArray through init_edra
|
||||
chain.atom_to_internal_coordinates()
|
||||
|
||||
for res in chain.internal_coord.ordered_aa_ic_list:
|
||||
# Look at only analines because that's what we generate
|
||||
if res.residue.get_resname() != "ALA":
|
||||
continue
|
||||
# print("REF", res, type(res))
|
||||
# print(res.dihedra.keys())
|
||||
|
||||
with open(pic_file, "w") as sink:
|
||||
PICIO.write_PIC(chain, sink)
|
||||
|
||||
|
||||
def pic_to_pdb(pic_file: str, pdb_file: str):
|
||||
"""
|
||||
Read int he PIC file and convert to a PDB file
|
||||
"""
|
||||
with open(pic_file) as source:
|
||||
f = PICIO.read_PIC(source)
|
||||
f.internal_to_atom_coordinates()
|
||||
|
||||
io = PDB.PDBIO()
|
||||
io.set_structure(f)
|
||||
io.save(pdb_file)
|
||||
|
||||
|
||||
def coords_to_trrosetta_angles(
|
||||
coords: Union[np.ndarray, Dict[str, List[List[float]]]],
|
||||
) -> Optional[np.ndarray]:
|
||||
@@ -258,47 +208,6 @@ def canonical_distances_and_dihedrals(
|
||||
return pd.DataFrame({k: calc_angles[k].squeeze() for k in distances + angles})
|
||||
|
||||
|
||||
def sample_coords(
|
||||
fname: str,
|
||||
subset_residues: Optional[Collection[str]] = None,
|
||||
query_atoms: List[str] = ["N", "CA", "C", "O", "CB"],
|
||||
) -> List[pd.DataFrame]:
|
||||
"""
|
||||
Sample the atomic coordinates of Alanine atoms. Return a list of dataframes each containing these
|
||||
coordinates.
|
||||
|
||||
We use this to help figure out where to initialize atoms when creating a new chain
|
||||
"""
|
||||
atomic_coords = []
|
||||
|
||||
parser = PDB.PDBParser(QUIET=True)
|
||||
s = parser.get_structure("", fname)
|
||||
for chain in s.get_chains():
|
||||
residues = [
|
||||
r for r in chain.get_residues() if r.get_resname() not in ("HOH", "NA")
|
||||
]
|
||||
|
||||
for res in residues:
|
||||
if subset_residues is not None and res.get_resname() not in subset_residues:
|
||||
continue
|
||||
coords = {}
|
||||
for atom in res.get_atoms():
|
||||
coords[atom.get_name()] = atom.get_coord()
|
||||
all_atoms_present = True
|
||||
|
||||
for atom in query_atoms:
|
||||
if atom not in coords:
|
||||
logging.debug(f"{atom} not found in {res.get_resname()}")
|
||||
all_atoms_present = False
|
||||
break
|
||||
|
||||
if all_atoms_present:
|
||||
atomic_coords.append(
|
||||
pd.DataFrame([coords[k] for k in query_atoms], index=query_atoms)
|
||||
)
|
||||
return atomic_coords
|
||||
|
||||
|
||||
def create_new_chain_nerf(
|
||||
out_fname: str,
|
||||
dists_and_angles: pd.DataFrame,
|
||||
|
||||
@@ -296,7 +296,6 @@ class CathCanonicalAnglesDataset(Dataset):
|
||||
f"Computing full dataset of {len(fnames)} with {multiprocessing.cpu_count()} threads"
|
||||
)
|
||||
# Generate dihedral angles
|
||||
# https://biopython.org/docs/1.76/api/Bio.PDB.PDBParser.html
|
||||
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
|
||||
struct_arrays = pool.map(pfunc, fnames, chunksize=250)
|
||||
pool.close()
|
||||
|
||||
Reference in New Issue
Block a user