mirror of
https://github.com/rdk/p2rank.git
synced 2026-06-04 12:44:24 +08:00
add implementation of transform/reduce-to-chains command
This commit is contained in:
BIN
distro/test_data/1fbl.cif.gz
Normal file
BIN
distro/test_data/1fbl.cif.gz
Normal file
Binary file not shown.
BIN
distro/test_data/2W83.cif.gz
Normal file
BIN
distro/test_data/2W83.cif.gz
Normal file
Binary file not shown.
BIN
distro/test_data/2W83.pdb.gz
Normal file
BIN
distro/test_data/2W83.pdb.gz
Normal file
Binary file not shown.
@@ -262,6 +262,34 @@ analyze() {
|
||||
|
||||
}
|
||||
|
||||
transform() {
|
||||
|
||||
title TRANSFORM COMMANDS
|
||||
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif -chains A # output: <out_dir>/2W83_A.cif
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/2W83.pdb -chains A # output: <out_dir>/2W83_A.pdb
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif.gz -chains A,B # output: <out_dir>/2W83_A,B.cif.gz
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif.gz -chains A,B -out_file distro/test_output/2W83_A,B.cif.gz # output: distro/test_output/2W83_A,B.cif.gz
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif -chains keep # output: <out_dir>/2W83.cif
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif -chains keep -out_format pdb.gz # output: <out_dir>/2W83.pdb.gz
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif -chains all # output: <out_dir>/2W83_all.cif
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif -chains A -out_format keep # output: <out_dir>/2W83_A.cif
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif.gz -chains A -out_format pdb.gz # output: <out_dir>/2W83_A.pdb.gz
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/2W83.pdb.gz -chains A,B -out_format cif # output: <out_dir>/2W83_A,B.cif
|
||||
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/1fbl.cif -chains A
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/1fbl.pdb -chains A
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/1fbl.cif.gz -chains A,B
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/1fbl.cif.gz -chains A,B -out_file distro/test_output/1fbl_A,B.cif.gz
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/1fbl.cif -chains keep
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/1fbl.cif -chains keep -out_format pdb.gz
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/1fbl.cif -chains all
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/1fbl.cif -chains A -out_format keep
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/1fbl.cif.gz -chains A -out_format pdb.gz
|
||||
test ./prank.sh transform reduce-to-chains -f distro/test_data/1fbl.pdb.gz -chains A,B -out_format cif
|
||||
|
||||
}
|
||||
|
||||
classifiers() {
|
||||
|
||||
title TRAIN/EVAL USING DIFFERENT CLASSIFIERS
|
||||
@@ -283,6 +311,8 @@ feature_importances() {
|
||||
test ./prank.sh traineval -t chen11-fpocket.ds -e joined.ds -c config/train-default -feature_importances 1 -classifier FasterForest2 -label FF2 -loop 1 -cache_datasets 0 -out_subdir TEST/IMPORTANCES
|
||||
}
|
||||
|
||||
|
||||
|
||||
###################################################################################################################
|
||||
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ Analyze a dataset with an explicitly specified residue labeling.
|
||||
## Reduce structure to chains
|
||||
|
||||
~~~sh
|
||||
./prank.sh analyze reduce-to-chains -f <structure_file> -chains <chain_names> -out_format <format_file_extension>
|
||||
./prank.sh analyze reduce-to-chains -f <structure_file> -chains <chain_names> -out_format <format_file_extension> -out_file <file_name>
|
||||
~~~
|
||||
* `-f <>` required, structure fie in one of the formats `pdb|pdb.gz|cif|cif.gz`
|
||||
* `-chains` required, coma separated list of chain names, wildcards: `keep`, `all`
|
||||
@@ -69,21 +69,24 @@ Analyze a dataset with an explicitly specified residue labeling.
|
||||
* `*` is not the same as keeping structure as is, but runs the reduction procedure with all the chains, useful for debugging
|
||||
* `-out_format` optional, default value is `keep` -- use the same format as the input
|
||||
* possible values: `keep|pdb|pdb.gz|cif|cif.gz`
|
||||
|
||||
* `-out_file` optional, output structure file name, path relative to the shell working directory
|
||||
* if specified, redced strucdure is saved under secified name and no other output is produced
|
||||
* if not specified, default name is generated (see examples) and file is saved in the output directory specified with parameters `-o`, `-output_base_dir`, `-out_subdir`
|
||||
|
||||
Examples:
|
||||
~~~sh
|
||||
./prank.sh analyze reduce-to-chains -f 2W83.cif -chains A # output file: 2W83_A.cif
|
||||
./prank.sh analyze reduce-to-chains -f 2W83.cif.gz -chains A,B # output file: 2W83_A,B.cif.gz
|
||||
./prank.sh analyze reduce-to-chains -f 2W83.cif -chains keep # output file: 2W83.cif
|
||||
./prank.sh analyze reduce-to-chains -f 2W83.cif -chains keep -out_format pdb.gz # output file: 2W83.pdb.gz
|
||||
./prank.sh analyze reduce-to-chains -f 2W83.cif -chains all # output file: 2W83_all.cif
|
||||
./prank.sh analyze reduce-to-chains -f 2W83.cif -chains A -out_format keep # output file: 2W83_A.cif
|
||||
./prank.sh analyze reduce-to-chains -f 2W83.cif.gz -chains A -out_format pdb.gz # output file: 2W83_A.pdb.gz
|
||||
./prank.sh analyze reduce-to-chains -f 2W83.pdb.gz -chains A,B -out_format cif # output file: 2W83_A,B.cif
|
||||
./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif -chains A # output: <out_dir>/2W83_A.cif
|
||||
./prank.sh transform reduce-to-chains -f distro/test_data/2W83.pdb -chains A # output: <out_dir>/2W83_A.pdb
|
||||
./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif.gz -chains A,B # output: <out_dir>/2W83_A,B.cif.gz
|
||||
./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif.gz -chains A,B -out_file distro/test_output/2W83_A,B.cif.gz # output: distro/test_output/2W83_A,B.cif.gz
|
||||
./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif -chains keep # output: <out_dir>/2W83.cif
|
||||
./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif -chains keep -out_format pdb.gz # output: <out_dir>/2W83.pdb.gz
|
||||
./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif -chains all # output: <out_dir>/2W83_all.cif
|
||||
./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif -chains A -out_format keep # output: <out_dir>/2W83_A.cif
|
||||
./prank.sh transform reduce-to-chains -f distro/test_data/2W83.cif.gz -chains A -out_format pdb.gz # output: <out_dir>/2W83_A.pdb.gz
|
||||
./prank.sh transform reduce-to-chains -f distro/test_data/2W83.pdb.gz -chains A,B -out_format cif # output: <out_dir>/2W83_A,B.cif
|
||||
~~~
|
||||
|
||||
|
||||
## Print
|
||||
|
||||
|
||||
|
||||
@@ -369,10 +369,10 @@ class Protein implements Parametrized {
|
||||
String saveToPdbFile(String fileName, boolean compressed = false) {
|
||||
if (compressed) {
|
||||
fileName += ".gz"
|
||||
Futils.writeGzip fileName, structure.toPDB()
|
||||
} else {
|
||||
Futils.writeFile fileName, structure.toPDB()
|
||||
}
|
||||
|
||||
PdbUtils.saveToFile(structure, "pdb", fileName, compressed)
|
||||
|
||||
return fileName
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ import cz.siret.prank.program.params.Params
|
||||
import cz.siret.prank.program.routines.Routine
|
||||
import cz.siret.prank.program.routines.analyze.AnalyzeRoutine
|
||||
import cz.siret.prank.program.routines.analyze.PrintRoutine
|
||||
import cz.siret.prank.program.routines.analyze.TransformRoutine
|
||||
import cz.siret.prank.program.routines.predict.PredictResiduesRoutine
|
||||
import cz.siret.prank.program.routines.predict.PredictRoutine
|
||||
import cz.siret.prank.program.routines.predict.RescoreRoutine
|
||||
@@ -339,6 +340,10 @@ class Main implements Parametrized, Writable {
|
||||
new AnalyzeRoutine(args, this).execute()
|
||||
}
|
||||
|
||||
private runTransform() {
|
||||
new TransformRoutine(args, this).execute()
|
||||
}
|
||||
|
||||
private runPrint() {
|
||||
new PrintRoutine(args, this).execute()
|
||||
}
|
||||
@@ -393,6 +398,8 @@ class Main implements Parametrized, Writable {
|
||||
break
|
||||
case 'analyze': runAnalyze()
|
||||
break
|
||||
case 'transform': runTransform()
|
||||
break
|
||||
case 'print': runPrint()
|
||||
break
|
||||
case 'run': runExperiment(args.unnamedArgs[0])
|
||||
|
||||
@@ -909,7 +909,7 @@ class Params {
|
||||
|
||||
/**
|
||||
* Timestamp that will be added as a prefix to each message printed to stdout ("" = no timestamp)
|
||||
* Example: "yyyy.MM.dd HHmm:"
|
||||
* Example: "yyyy.MM.dd HH:mm:"
|
||||
*/
|
||||
@RuntimeParam
|
||||
String stdout_timestamp = ""
|
||||
@@ -1185,6 +1185,20 @@ class Params {
|
||||
@ModelParam
|
||||
boolean feat_csv_ignore_missing = false
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@RuntimeParam
|
||||
String chains = "keep"
|
||||
|
||||
@RuntimeParam
|
||||
String out_format = "keep"
|
||||
|
||||
@RuntimeParam
|
||||
String out_file = null
|
||||
|
||||
|
||||
//===========================================================================================================//
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
package cz.siret.prank.program.routines.analyze
|
||||
|
||||
import com.google.common.base.Splitter
|
||||
import com.google.common.collect.ImmutableMap
|
||||
import cz.siret.prank.domain.*
|
||||
import cz.siret.prank.domain.labeling.*
|
||||
import cz.siret.prank.domain.loaders.LoaderParams
|
||||
import cz.siret.prank.export.FastaExporter
|
||||
import cz.siret.prank.geom.Atoms
|
||||
import cz.siret.prank.geom.Struct
|
||||
import cz.siret.prank.program.Main
|
||||
import cz.siret.prank.program.PrankException
|
||||
import cz.siret.prank.program.rendering.PymolRenderer
|
||||
import cz.siret.prank.program.rendering.RenderingModel
|
||||
import cz.siret.prank.program.routines.Routine
|
||||
import cz.siret.prank.utils.BinCounter
|
||||
import cz.siret.prank.utils.CmdLineArgs
|
||||
import cz.siret.prank.utils.Futils
|
||||
import cz.siret.prank.utils.PdbUtils
|
||||
import cz.siret.prank.utils.Sutils
|
||||
import groovy.transform.CompileStatic
|
||||
import groovy.util.logging.Slf4j
|
||||
import org.biojava.nbio.structure.ResidueNumber
|
||||
import org.biojava.nbio.structure.Structure
|
||||
|
||||
import static cz.siret.prank.geom.SecondaryStructureUtils.assignSecondaryStructure
|
||||
import static cz.siret.prank.utils.Cutils.newSynchronizedList
|
||||
import static cz.siret.prank.utils.Formatter.format
|
||||
import static cz.siret.prank.utils.Futils.mkdirs
|
||||
import static cz.siret.prank.utils.Futils.writeFile
|
||||
|
||||
/**
|
||||
* Various tools for analyzing datasets.
|
||||
* Routine with sub-commands.
|
||||
*/
|
||||
@Slf4j
|
||||
@CompileStatic
|
||||
class TransformRoutine extends Routine {
|
||||
|
||||
String subCommand
|
||||
String label
|
||||
Dataset dataset
|
||||
CmdLineArgs args
|
||||
|
||||
TransformRoutine(CmdLineArgs args, Main main) {
|
||||
super(null)
|
||||
|
||||
this.args = args
|
||||
|
||||
subCommand = args.popFirstUnnamedArg() // next if present should be dataset
|
||||
if (!commandRegister.containsKey(subCommand)) {
|
||||
write "Invalid transform sub-command '$subCommand'! Available commands: " + commandRegister.keySet()
|
||||
throw new PrankException("Invalid command.")
|
||||
}
|
||||
|
||||
dataset = main.loadDatasetOrFile()
|
||||
|
||||
label = "transform_" + subCommand + "_" + dataset.label
|
||||
outdir = main.findOutdir(label)
|
||||
}
|
||||
|
||||
void execute() {
|
||||
write "executing transform $subCommand command"
|
||||
|
||||
commandRegister.get(subCommand).call()
|
||||
}
|
||||
|
||||
//===========================================================================================================//
|
||||
// Sub-Commands
|
||||
//===========================================================================================================//
|
||||
|
||||
final Map<String, Closure> commandRegister = ImmutableMap.copyOf([
|
||||
"reduce-to-chains" : { cmdReduceToChains() }
|
||||
])
|
||||
|
||||
//===========================================================================================================//
|
||||
|
||||
/**
|
||||
* chain label = "<author_id>(<mmcif_id>)"
|
||||
*/
|
||||
private List<String> chainLabels(Structure structure) {
|
||||
return structure.chains.collect { Struct.getAuthorId(it) + "(" + Struct.getMmcifId(it) + ")" }
|
||||
}
|
||||
|
||||
private void cmdReduceToChains() {
|
||||
String file = args.get("f")
|
||||
String outFormatParam = params.out_format
|
||||
String outFileParam = params.out_file
|
||||
String chainsParam = params.chains
|
||||
|
||||
def validVals = ["keep", "pdb", "pdb.gz", "cif", "cif.gz"]
|
||||
if (!(outFormatParam in validVals)) {
|
||||
throw new PrankException("Invalid value of out_format param: '$outFormatParam'. Valid values: $validVals")
|
||||
}
|
||||
|
||||
write "processing file [${Futils.absPath(file)}]"
|
||||
|
||||
Structure structure = PdbUtils.loadFromFile(file)
|
||||
String baseFileName = Futils.baseName(file)
|
||||
String outFileBaseName // without extension
|
||||
|
||||
List<String> schains = structure.chains.collect { Struct.getAuthorId(it) }.toUnique().toSorted()
|
||||
write "chains: " + chainLabels(structure)
|
||||
write "atoms: " + Atoms.allFromStructure(structure).count
|
||||
|
||||
if (chainsParam == "keep") {
|
||||
write "keeping the structure as is / not reducing to chains"
|
||||
outFileBaseName = baseFileName
|
||||
} else {
|
||||
List<String> newChains
|
||||
if (chainsParam == "all") {
|
||||
write "selecting all the chains"
|
||||
newChains = schains
|
||||
outFileBaseName = baseFileName + "_all"
|
||||
} else {
|
||||
newChains = Sutils.split(chainsParam, ",")
|
||||
outFileBaseName = baseFileName + "_" + newChains.join(",")
|
||||
}
|
||||
|
||||
write "reducing to chains: " + newChains
|
||||
|
||||
structure = PdbUtils.reduceStructureToChains(structure, newChains)
|
||||
write "chains (after reduction): " + chainLabels(structure)
|
||||
write "atoms (after reduction): " + Atoms.allFromStructure(structure).count
|
||||
}
|
||||
|
||||
boolean compress = false
|
||||
String outFormat = "pdb"
|
||||
String outExt
|
||||
if (outFormatParam == "keep") {
|
||||
compress = Futils.isCompressed(file)
|
||||
outFormat = Futils.realExtension(file)
|
||||
outExt = Futils.realExtension(file) + ((compress) ? ".gz" : "")
|
||||
if (outFormat == "ent") {
|
||||
outFormat = "pdb"
|
||||
}
|
||||
} else {
|
||||
compress = Futils.isCompressed(outFormatParam)
|
||||
outFormat = Sutils.removeSuffix(outFormatParam, ".gz")
|
||||
outExt = outFormat + ((compress) ? ".gz" : "")
|
||||
}
|
||||
|
||||
String outFilePath
|
||||
if (outFileParam != null) {
|
||||
outFilePath = outFileParam
|
||||
} else {
|
||||
mkdirs(outdir)
|
||||
writeParams(outdir)
|
||||
String outFileName = outFileBaseName + "." + outExt
|
||||
outFilePath = outdir + "/" + outFileName
|
||||
}
|
||||
|
||||
write "Output file: " + Futils.absPath(outFilePath)
|
||||
|
||||
PdbUtils.saveToFile(structure, outFormat, outFilePath, compress)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -110,6 +110,29 @@ class PdbUtils {
|
||||
return struc
|
||||
}
|
||||
|
||||
/**
|
||||
* @param fileName
|
||||
* @param format "cif" or "pdb"
|
||||
* @param compressed - compress to gz
|
||||
* @return file name used
|
||||
*/
|
||||
static String saveToFile(Structure structure, String format, String fileName, boolean compressed = false) {
|
||||
String content
|
||||
if (format == "cif") {
|
||||
content = structure.toMMCIF()
|
||||
} else {
|
||||
content = structure.toPDB()
|
||||
}
|
||||
|
||||
if (compressed) {
|
||||
Futils.writeGzip fileName, content
|
||||
} else {
|
||||
Futils.writeFile fileName, content
|
||||
}
|
||||
return fileName
|
||||
}
|
||||
|
||||
//===========================================================================================================//
|
||||
|
||||
static String correctResidueCode(String residueCode) {
|
||||
//MSE is only found as a molecular replacement for MET
|
||||
|
||||
Reference in New Issue
Block a user