Files
rdkit/Python/Dbase/Pubmed/Records.py

131 lines
4.2 KiB
Python
Executable File

# $Id$
#
# Copyright (C) 2003-2006 Rational Discovery LLC
#
# @@ All Rights Reserved @@
#
from elementtree import ElementTree
# check the version of ElementTree. We need at least version 1.2
# in order for the XPath-style parsing stuff to work
import re
vers = re.split("[a-zA-Z]",ElementTree.VERSION)[0]
if vers < '1.2':
raise ImportError,'The PubMed record interface requires a version of ElementTree >= 1.2'
class Record(object):
def __init__(self,element):
for field in self._fieldsOfInterest:
setattr(self,field,'')
self._element = element
def toXML(self):
from cStringIO import StringIO
sio = StringIO()
ElementTree.ElementTree(self._element).write(sio)
return sio.getvalue()
class SummaryRecord(Record):
_fieldsOfInterest=['PubMedId','PubDate','Source','Authors',
'Title','Volume','Issue','Pages','Lang',
'HasAbstract','RecordStatus']
def __init__(self,element):
Record.__init__(self,element)
for item in element.getiterator('Item'):
if item.attrib['Name'] in self._fieldsOfInterest:
setattr(self,item.attrib['Name'],item.text)
if self.PubDate:
self.PubYear = str(self.PubDate).split(' ')[0]
class JournalArticleRecord(Record):
_fieldsOfInterest=['PubMedId','PubYear','Source','Authors',
'Title','Volume','Issue','Pages','Lang',
'Abstract']
def __init__(self,element):
Record.__init__(self,element)
cite = self._element.find('MedlineCitation')
self.PubMedId = cite.findtext('PMID')
article = cite.find('Article')
issue = article.find('Journal/JournalIssue')
self.Volume = issue.findtext('Volume')
self.Issue = issue.findtext('Issue')
self.PubYear = issue.findtext('PubDate/Year')
if not self.PubYear:
txt = issue.findtext('PubDate/MedlineDate')
self.PubYear = txt.split(' ')[0]
self.Title = unicode(article.findtext('ArticleTitle'))
self.Pages = article.findtext('Pagination/MedlinePgn')
abs = article.findtext('Abstract/AbstractText')
if abs:
self.Abstract = unicode(abs)
self.authors = []
tmp = []
for author in article.find('AuthorList').getiterator('Author'):
last = unicode(author.findtext('LastName'))
first = unicode(author.findtext('ForeName'))
initials = unicode(author.findtext('Initials'))
self.authors.append((last,first,initials))
tmp.append('%s %s'%(last,initials))
self.Authors=', '.join(tmp)
journal = cite.findtext('MedlineJournalInfo/MedlineTA')
if journal:
self.Source = unicode(journal)
self.ParseKeywords()
self.ParseChemicals()
def ParseKeywords(self):
self.keywords = []
headings = self.find('MedlineCitation/MeshHeadingList')
if headings:
for heading in headings.getiterator('MeshHeading'):
kw = unicode(heading.findtext('DescriptorName'))
for qualifier in heading.getiterator('QualifierName'):
kw += ' / %s'%(unicode(qualifier.text))
self.keywords.append(kw)
def ParseChemicals(self):
self.chemicals = []
chemicals = self.find('MedlineCitation/ChemicalList')
if chemicals:
for chemical in chemicals.getiterator('Chemical'):
name = chemical.findtext('NameOfSubstance').encode('utf-8')
rn = chemical.findtext('RegistryNumber').encode('utf-8')
if rn != '0':
self.chemicals.append('%s <%s>'%(name,rn))
else:
self.chemicals.append('%s'%(name))
# --------------------------------------------
#
# We'll expose these ElementTree methods in case
# client code wants to pull extra info
#
def getiterator(self,key=None):
if key is not None:
return self._element.getiterator(key)
else:
return self._element.getiterator()
def find(self,key):
return self._element.find(key)
def findtext(self,key):
return self._element.findtext(key)
def findall(self,key):
return self._element.findall(key)
class LinkRecord(Record):
_fieldsOfInterest=[]
def __init__(self,element):
Record.__init__(self,element)
self.PubMedId = self._element.text
nbr = self._element.get('HasNeighbor','N')
if nbr == 'Y':
self.HasNeighbor = 1
else:
self.HasNeighbor = 0