mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
248 lines
6.4 KiB
Python
Executable File
248 lines
6.4 KiB
Python
Executable File
# $Id$
|
|
#
|
|
# Copyright (C) 2002-2006 greg Landrum and Rational Discovery LLC
|
|
#
|
|
# @@ All Rights Reserved @@
|
|
#
|
|
""" contains a class to store parameters for and results from
|
|
Composite building
|
|
|
|
"""
|
|
import RDConfig
|
|
from Dbase.DbConnection import DbConnect
|
|
import DataStructs
|
|
from Dbase import DbModule
|
|
##from ML.SVM import SVMClassificationModel as SVM
|
|
|
|
def SetDefaults(runDetails):
|
|
""" initializes a details object with default values
|
|
|
|
**Arguments**
|
|
|
|
- details: (optional) a _CompositeRun.CompositeRun_ object.
|
|
If this is not provided, the global _runDetails will be used.
|
|
|
|
**Returns**
|
|
|
|
the initialized _CompositeRun_ object.
|
|
|
|
|
|
"""
|
|
runDetails.nRuns = 1
|
|
runDetails.nModels = 10
|
|
runDetails.outName = ''
|
|
runDetails.badName = ''
|
|
runDetails.splitRun=0
|
|
runDetails.splitFrac=0.7
|
|
runDetails.lockRandom = 0
|
|
runDetails.randomActivities = 0
|
|
runDetails.shuffleActivities = 0
|
|
runDetails.replacementSelection = 0
|
|
|
|
#
|
|
# Tree Parameters
|
|
#
|
|
runDetails.useTrees = 1
|
|
runDetails.pruneIt=0
|
|
runDetails.lessGreedy=0
|
|
runDetails.limitDepth=-1
|
|
runDetails.recycleVars=0
|
|
runDetails.randomDescriptors=0 # toggles growing of random forests
|
|
|
|
#
|
|
# KNN Parameters
|
|
#
|
|
runDetails.useKNN = 0
|
|
runDetails.knnDistFunc = ''
|
|
runDetails.knnNeighs = 0
|
|
|
|
#
|
|
# SigTree Parameters
|
|
#
|
|
runDetails.useSigTrees =0
|
|
runDetails.useCMIM=0
|
|
runDetails.allowCollections=False
|
|
|
|
#
|
|
# Naive Bayes Classifier Parameters
|
|
#
|
|
runDetails.useNaiveBayes = 0
|
|
runDetails.mEstimateVal=-1.0
|
|
runDetails.useSigBayes =0
|
|
|
|
## #
|
|
## # SVM Parameters
|
|
## #
|
|
## runDetails.useSVM = 0
|
|
## runDetails.svmKernel = SVM.radialKernel
|
|
## runDetails.svmType = SVM.cSVCType
|
|
## runDetails.svmGamma = None
|
|
## runDetails.svmCost = None
|
|
## runDetails.svmWeights = None
|
|
## runDetails.svmDataType = 'float'
|
|
## runDetails.svmDegree = 3
|
|
## runDetails.svmCoeff = 0.0
|
|
## runDetails.svmEps = 0.001
|
|
## runDetails.svmNu = 0.5
|
|
## runDetails.svmCache = 40
|
|
## runDetails.svmShrink = 1
|
|
## runDetails.svmDataType='float'
|
|
|
|
|
|
runDetails.bayesModel = 0
|
|
runDetails.dbName = ''
|
|
runDetails.dbUser = RDConfig.defaultDBUser
|
|
runDetails.dbPassword = RDConfig.defaultDBPassword
|
|
runDetails.dbWhat='*'
|
|
runDetails.dbWhere=''
|
|
runDetails.dbJoin=''
|
|
runDetails.qTableName = ''
|
|
runDetails.qBounds = []
|
|
runDetails.qBoundCount = ''
|
|
runDetails.activityBounds = []
|
|
runDetails.activityBoundsVals = ''
|
|
runDetails.detailedRes = 0
|
|
runDetails.noScreen = 0
|
|
runDetails.threshold = 0.0
|
|
runDetails.filterFrac = 0.0
|
|
runDetails.filterVal = 0.0
|
|
runDetails.modelFilterVal = 0.0
|
|
runDetails.modelFilterFrac = 0.0
|
|
runDetails.internalHoldoutFrac = 0.3
|
|
runDetails.pickleDataFileName=''
|
|
runDetails.startAt=None
|
|
runDetails.persistTblName=''
|
|
runDetails.randomSeed=(23,42)
|
|
runDetails.note=''
|
|
|
|
|
|
return runDetails
|
|
|
|
|
|
class CompositeRun:
|
|
""" class to store parameters for and results from Composite building
|
|
|
|
This class has a default set of fields which are added to the database.
|
|
|
|
By default these fields are stored in a tuple, so they are immutable. This
|
|
is probably what you want.
|
|
|
|
|
|
"""
|
|
fields = (\
|
|
("rundate","varchar(32)"),
|
|
("dbName","varchar(200)"),
|
|
("dbWhat","varchar(200)"),
|
|
("dbWhere","varchar(200)"),
|
|
("dbJoin","varchar(200)"),
|
|
("tableName","varchar(80)"),
|
|
("note","varchar(120)"),
|
|
("shuffled","smallint"),
|
|
("randomized","smallint"),
|
|
("overall_error","float"),
|
|
("holdout_error","float"),
|
|
("overall_fraction_dropped","float"),
|
|
("holdout_fraction_dropped","float"),
|
|
("overall_correct_conf","float"),
|
|
("overall_incorrect_conf","float"),
|
|
("holdout_correct_conf","float"),
|
|
("holdout_incorrect_conf","float"),
|
|
("overall_result_matrix","varchar(256)"),
|
|
("holdout_result_matrix","varchar(256)"),
|
|
("threshold","float"),
|
|
("splitFrac","float"),
|
|
("filterFrac","float"),
|
|
("filterVal","float"),
|
|
("modelFilterVal", "float"),
|
|
("modelFilterFrac", "float"),
|
|
("nModels","int"),
|
|
("limitDepth","int"),
|
|
("bayesModels","int"),
|
|
("qBoundCount","varchar(3000)"),
|
|
("activityBoundsVals","varchar(200)"),
|
|
("cmd","varchar(500)"),
|
|
("model",DbModule.binaryTypeName),
|
|
)
|
|
|
|
def _CreateTable(self,cn,tblName):
|
|
""" *Internal Use only*
|
|
|
|
"""
|
|
names = map(lambda x:x.strip().upper(),cn.GetTableNames())
|
|
if tblName.upper() not in names:
|
|
curs = cn.GetCursor()
|
|
fmt = []
|
|
for name,value in self.fields:
|
|
fmt.append('%s %s'%(name,value))
|
|
fmtStr = ','.join(fmt)
|
|
curs.execute('create table %s (%s)'%(tblName,fmtStr))
|
|
cn.Commit()
|
|
else:
|
|
heads = [x.upper() for x in cn.GetColumnNames()]
|
|
curs = cn.GetCursor()
|
|
for name,value in self.fields:
|
|
if name.upper() not in heads:
|
|
curs.execute('alter table %s add %s %s'%(tblName,name,value))
|
|
cn.Commit()
|
|
def Store(self,db='models.gdb',table='results',
|
|
user='sysdba',password='masterkey'):
|
|
""" adds the result to a database
|
|
|
|
**Arguments**
|
|
|
|
- db: name of the database to use
|
|
|
|
- table: name of the table to use
|
|
|
|
- user&password: connection information
|
|
|
|
"""
|
|
cn = DbConnect(db,table,user,password)
|
|
curs = cn.GetCursor()
|
|
self._CreateTable(cn,table)
|
|
|
|
cols = []
|
|
vals = []
|
|
for name,typ in self.fields:
|
|
try:
|
|
v = getattr(self,name)
|
|
except AttributeError:
|
|
pass
|
|
else:
|
|
cols.append('%s'%name)
|
|
vals.append(v)
|
|
|
|
nToDo = len(vals)
|
|
qs = ','.join([DbModule.placeHolder]*nToDo)
|
|
vals = tuple(vals)
|
|
|
|
cmd = 'insert into %s (%s) values (%s)'%(table,
|
|
','.join(cols),
|
|
qs)
|
|
curs.execute(cmd,vals)
|
|
cn.Commit()
|
|
|
|
def GetDataSet(self,**kwargs):
|
|
""" Returns a MLDataSet pulled from a database using our stored
|
|
values.
|
|
|
|
"""
|
|
from ML.Data import DataUtils
|
|
data = DataUtils.DBToData(self.dbName,self.tableName,
|
|
user=self.dbUser,password=self.dbPassword,
|
|
what=self.dbWhat,where=self.dbWhere,
|
|
join=self.dbJoin,**kwargs)
|
|
|
|
return data
|
|
|
|
|
|
def GetDataSetInfo(self,**kwargs):
|
|
""" Returns a MLDataSet pulled from a database using our stored
|
|
values.
|
|
|
|
"""
|
|
from Dbase.DbConnection import DbConnect
|
|
conn = DbConnect(self.dbName,self.tableName)
|
|
res = conn.GetColumnNamesAndTypes(join=self.dbJoin,what=self.dbWhat,where=self.dbWhere)
|
|
return res
|