mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-04 21:54:27 +08:00
235 lines
5.2 KiB
Python
Executable File
235 lines
5.2 KiB
Python
Executable File
# $Id$
|
|
#
|
|
# Copyright (C) 2003-2006 Rational Discovery LLC
|
|
#
|
|
# @@ All Rights Reserved @@
|
|
#
|
|
""" Various storage (molecular and otherwise) functionality
|
|
|
|
"""
|
|
import RDConfig
|
|
from Dbase import DbModule
|
|
from Dbase.DbConnection import DbConnect
|
|
|
|
def ValidateRDId(id):
|
|
""" returns whether or not an RDId is valid
|
|
|
|
>>> ValidateRDId('RDCmpd-000-009-9')
|
|
1
|
|
>>> ValidateRDId('RDCmpd-009-000-009-8')
|
|
1
|
|
>>> ValidateRDId('RDCmpd-009-000-109-8')
|
|
0
|
|
>>> ValidateRDId('bogus')
|
|
0
|
|
|
|
"""
|
|
id = id.replace('_','-')
|
|
splitId = id.split('-')
|
|
if len(splitId)<4:
|
|
return 0
|
|
accum = 0
|
|
for entry in splitId[1:-1]:
|
|
for char in entry:
|
|
try:
|
|
v = int(char)
|
|
except:
|
|
return 0
|
|
accum += v
|
|
crc = int(splitId[-1])
|
|
return accum%10 == crc
|
|
|
|
def RDIdToInt(id,validate=1):
|
|
""" Returns the integer index for a given RDId
|
|
Throws a ValueError on error
|
|
|
|
>>> RDIdToInt('RDCmpd-000-009-9')
|
|
9
|
|
>>> RDIdToInt('RDCmpd-009-000-009-8')
|
|
9000009
|
|
>>> RDIdToInt('RDData_000_009_9')
|
|
9
|
|
>>> try:
|
|
... RDIdToInt('RDCmpd-009-000-109-8')
|
|
... except ValueError:
|
|
... print 'ok'
|
|
... else:
|
|
... print 'failed'
|
|
ok
|
|
>>> try:
|
|
... RDIdToInt('bogus')
|
|
... except ValueError:
|
|
... print 'ok'
|
|
... else:
|
|
... print 'failed'
|
|
ok
|
|
|
|
"""
|
|
if validate and not ValidateRDId(id):
|
|
raise ValueError,"Bad RD Id"
|
|
id = id.replace('_','-')
|
|
terms = id.split('-')[1:-1]
|
|
res = 0
|
|
factor = 1
|
|
terms.reverse()
|
|
for term in terms:
|
|
res += factor*int(term)
|
|
factor *= 1000
|
|
return res
|
|
|
|
|
|
def IndexToRDId(idx,leadText='RDCmpd'):
|
|
""" Converts an integer index into an RDId
|
|
|
|
The format of the ID is:
|
|
leadText-xxx-xxx-xxx-y
|
|
The number blocks are zero padded and the the final digit (y)
|
|
is a checksum:
|
|
>>> str(IndexToRDId(9))
|
|
'RDCmpd-000-009-9'
|
|
>>> str(IndexToRDId(9009))
|
|
'RDCmpd-009-009-8'
|
|
|
|
A millions block is included if it's nonzero:
|
|
>>> str(IndexToRDId(9000009))
|
|
'RDCmpd-009-000-009-8'
|
|
|
|
The text at the beginning can be altered:
|
|
>>> str(IndexToRDId(9,leadText='RDAlt'))
|
|
'RDAlt-000-009-9'
|
|
|
|
Negative indices are errors:
|
|
>>> try:
|
|
... IndexToRDId(-1)
|
|
... except ValueError:
|
|
... print 'ok'
|
|
... else:
|
|
... print 'failed'
|
|
ok
|
|
|
|
"""
|
|
if idx < 0:
|
|
raise ValueError,'indices must be >= zero'
|
|
|
|
res = leadText+'-'
|
|
tmpIdx = idx
|
|
if idx>=1e6:
|
|
res += '%03d-'%(idx//1e6)
|
|
tmpIdx = idx % int(1e6)
|
|
if tmpIdx<1000:
|
|
res += '000-'
|
|
else:
|
|
res += '%03d-'%(tmpIdx//1000)
|
|
tmpIdx = tmpIdx % 1000
|
|
|
|
res += '%03d-'%(tmpIdx)
|
|
accum = 0
|
|
txt = str(idx)
|
|
for char in txt:
|
|
accum += int(char)
|
|
|
|
res += str(accum%10)
|
|
return res
|
|
|
|
def GetNextId(conn,table,idColName='Id'):
|
|
""" returns the next available Id in the database
|
|
|
|
see RegisterItem for testing/documentation
|
|
|
|
"""
|
|
vals = conn.GetData(table=table,fields=idColName)
|
|
max = 0
|
|
for val in vals:
|
|
val = RDIdToInt(val[0],validate=0)
|
|
if val > max: max = val
|
|
max += 1
|
|
return max
|
|
|
|
def GetNextRDId(conn,table,idColName='Id',leadText=''):
|
|
""" returns the next available RDId in the database
|
|
|
|
see RegisterItem for testing/documentation
|
|
|
|
"""
|
|
if not leadText:
|
|
val = conn.GetData(table=table,fields=idColName)[0][0]
|
|
val = val.replace('_','-')
|
|
leadText = val.split('-')[0]
|
|
|
|
id = GetNextId(conn,table,idColName=idColName)
|
|
return IndexToRDId(id,leadText=leadText)
|
|
|
|
def RegisterItem(conn,table,value,columnName,data=None,
|
|
id='',idColName='Id',leadText='RDCmpd'):
|
|
"""
|
|
|
|
>>> dbName = RDConfig.RDTestDatabase
|
|
>>> conn = DbConnect(dbName)
|
|
>>> tblName = 'StorageTest'
|
|
>>> conn.AddTable(tblName,'id varchar(32) not null primary key,label varchar(40),val int')
|
|
>>> RegisterItem(conn,tblName,'label1','label',['label1',1])==(1, 'RDCmpd-000-001-1')
|
|
True
|
|
>>> RegisterItem(conn,tblName,'label2','label',['label2',1])==(1, 'RDCmpd-000-002-2')
|
|
True
|
|
>>> RegisterItem(conn,tblName,'label1','label',['label1',1])==(0, 'RDCmpd-000-001-1')
|
|
True
|
|
>>> str(GetNextRDId(conn,tblName))
|
|
'RDCmpd-000-003-3'
|
|
>>> tuple(conn.GetData(table=tblName)[0])==('RDCmpd-000-001-1', 'label1', 1)
|
|
True
|
|
|
|
It's also possible to provide ids by hand:
|
|
>>> RegisterItem(conn,tblName,'label10','label',['label10',1],id='RDCmpd-000-010-1')==(1, 'RDCmpd-000-010-1')
|
|
True
|
|
>>> str(GetNextRDId(conn,tblName))
|
|
'RDCmpd-000-011-2'
|
|
|
|
"""
|
|
curs = conn.GetCursor()
|
|
query = 'select %s from %s where %s=%s'%(idColName,table,columnName,DbModule.placeHolder)
|
|
curs.execute(query,(value,))
|
|
tmp = curs.fetchone()
|
|
if tmp:
|
|
return 0,tmp[0]
|
|
if not id:
|
|
id = GetNextRDId(conn,table,idColName=idColName,leadText=leadText)
|
|
if data:
|
|
row = [id]
|
|
row.extend(data)
|
|
conn.InsertData(table,row)
|
|
conn.Commit()
|
|
return 1,id
|
|
|
|
|
|
|
|
|
|
|
|
#------------------------------------
|
|
#
|
|
# doctest boilerplate
|
|
#
|
|
_roundtripTests = """
|
|
>>> ValidateRDId(IndexToRDId(100))
|
|
1
|
|
>>> ValidateRDId(IndexToRDId(10000,leadText='foo'))
|
|
1
|
|
>>> indices = [1,100,1000,1000000]
|
|
>>> vals = []
|
|
>>> for idx in indices:
|
|
... vals.append(RDIdToInt(IndexToRDId(idx)))
|
|
>>> vals == indices
|
|
1
|
|
|
|
"""
|
|
__test__ = {"roundtrip":_roundtripTests}
|
|
|
|
def _test():
|
|
import doctest,sys
|
|
return doctest.testmod(sys.modules["__main__"])
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
failed,tried = _test()
|
|
sys.exit(failed)
|
|
|