Give script more descriptive name

This commit is contained in:
Lukas Jarosch
2024-05-05 23:49:22 -07:00
parent e2479cb539
commit 0b5c9492ff

View File

@@ -1,9 +1,10 @@
"""
The RODA database is non-redundant, meaning that it only stores one explicit
representative alignment directory for all PDB chains in a 100% sequence
identity cluster. In order to add explicit alignments for all PDB chains, this
script will add the missing chain directories and symlink them to their
representative alignment directories.
The OpenProteinSet alignment database is non-redundant, meaning that it only
stores one explicit representative alignment directory for all PDB chains in a
100% sequence identity cluster. In order to add explicit alignments for all PDB
chains, this script will add the missing chain directories and symlink them to
their representative alignment directories. This is required in order to train
OpenFold on the full PDB, not just one representative chain per cluster.
"""
from argparse import ArgumentParser
@@ -52,6 +53,9 @@ def main(alignment_dir: Path, duplicate_chains_file: Path):
with open(duplicate_chains_file, "r") as fp:
duplicate_chains = [list(line.strip().split()) for line in fp]
# convert to absolute path for symlink creation
alignment_dir = alignment_dir.resolve()
create_duplicate_dirs(duplicate_chains, alignment_dir)