mirror of
https://github.com/rdkit/rdkit.git
synced 2026-06-07 22:44:25 +08:00
131 lines
4.2 KiB
Python
Executable File
131 lines
4.2 KiB
Python
Executable File
# $Id$
|
|
#
|
|
# Copyright (C) 2003-2006 Rational Discovery LLC
|
|
#
|
|
# @@ All Rights Reserved @@
|
|
#
|
|
from elementtree import ElementTree
|
|
# check the version of ElementTree. We need at least version 1.2
|
|
# in order for the XPath-style parsing stuff to work
|
|
import re
|
|
vers = re.split("[a-zA-Z]",ElementTree.VERSION)[0]
|
|
if vers < '1.2':
|
|
raise ImportError,'The PubMed record interface requires a version of ElementTree >= 1.2'
|
|
|
|
|
|
class Record(object):
|
|
def __init__(self,element):
|
|
for field in self._fieldsOfInterest:
|
|
setattr(self,field,'')
|
|
self._element = element
|
|
def toXML(self):
|
|
from cStringIO import StringIO
|
|
sio = StringIO()
|
|
ElementTree.ElementTree(self._element).write(sio)
|
|
return sio.getvalue()
|
|
|
|
class SummaryRecord(Record):
|
|
_fieldsOfInterest=['PubMedId','PubDate','Source','Authors',
|
|
'Title','Volume','Issue','Pages','Lang',
|
|
'HasAbstract','RecordStatus']
|
|
def __init__(self,element):
|
|
Record.__init__(self,element)
|
|
for item in element.getiterator('Item'):
|
|
if item.attrib['Name'] in self._fieldsOfInterest:
|
|
setattr(self,item.attrib['Name'],item.text)
|
|
if self.PubDate:
|
|
self.PubYear = str(self.PubDate).split(' ')[0]
|
|
|
|
class JournalArticleRecord(Record):
|
|
_fieldsOfInterest=['PubMedId','PubYear','Source','Authors',
|
|
'Title','Volume','Issue','Pages','Lang',
|
|
'Abstract']
|
|
def __init__(self,element):
|
|
Record.__init__(self,element)
|
|
|
|
cite = self._element.find('MedlineCitation')
|
|
self.PubMedId = cite.findtext('PMID')
|
|
article = cite.find('Article')
|
|
issue = article.find('Journal/JournalIssue')
|
|
self.Volume = issue.findtext('Volume')
|
|
self.Issue = issue.findtext('Issue')
|
|
self.PubYear = issue.findtext('PubDate/Year')
|
|
if not self.PubYear:
|
|
txt = issue.findtext('PubDate/MedlineDate')
|
|
self.PubYear = txt.split(' ')[0]
|
|
self.Title = unicode(article.findtext('ArticleTitle'))
|
|
self.Pages = article.findtext('Pagination/MedlinePgn')
|
|
abs = article.findtext('Abstract/AbstractText')
|
|
if abs:
|
|
self.Abstract = unicode(abs)
|
|
|
|
self.authors = []
|
|
tmp = []
|
|
for author in article.find('AuthorList').getiterator('Author'):
|
|
last = unicode(author.findtext('LastName'))
|
|
first = unicode(author.findtext('ForeName'))
|
|
initials = unicode(author.findtext('Initials'))
|
|
self.authors.append((last,first,initials))
|
|
tmp.append('%s %s'%(last,initials))
|
|
self.Authors=', '.join(tmp)
|
|
journal = cite.findtext('MedlineJournalInfo/MedlineTA')
|
|
if journal:
|
|
self.Source = unicode(journal)
|
|
|
|
self.ParseKeywords()
|
|
self.ParseChemicals()
|
|
|
|
def ParseKeywords(self):
|
|
self.keywords = []
|
|
headings = self.find('MedlineCitation/MeshHeadingList')
|
|
if headings:
|
|
for heading in headings.getiterator('MeshHeading'):
|
|
kw = unicode(heading.findtext('DescriptorName'))
|
|
for qualifier in heading.getiterator('QualifierName'):
|
|
kw += ' / %s'%(unicode(qualifier.text))
|
|
self.keywords.append(kw)
|
|
|
|
def ParseChemicals(self):
|
|
self.chemicals = []
|
|
chemicals = self.find('MedlineCitation/ChemicalList')
|
|
if chemicals:
|
|
for chemical in chemicals.getiterator('Chemical'):
|
|
name = chemical.findtext('NameOfSubstance').encode('utf-8')
|
|
rn = chemical.findtext('RegistryNumber').encode('utf-8')
|
|
if rn != '0':
|
|
self.chemicals.append('%s <%s>'%(name,rn))
|
|
else:
|
|
self.chemicals.append('%s'%(name))
|
|
|
|
|
|
# --------------------------------------------
|
|
#
|
|
# We'll expose these ElementTree methods in case
|
|
# client code wants to pull extra info
|
|
#
|
|
def getiterator(self,key=None):
|
|
if key is not None:
|
|
return self._element.getiterator(key)
|
|
else:
|
|
return self._element.getiterator()
|
|
def find(self,key):
|
|
return self._element.find(key)
|
|
def findtext(self,key):
|
|
return self._element.findtext(key)
|
|
def findall(self,key):
|
|
return self._element.findall(key)
|
|
|
|
class LinkRecord(Record):
|
|
_fieldsOfInterest=[]
|
|
def __init__(self,element):
|
|
Record.__init__(self,element)
|
|
self.PubMedId = self._element.text
|
|
nbr = self._element.get('HasNeighbor','N')
|
|
if nbr == 'Y':
|
|
self.HasNeighbor = 1
|
|
else:
|
|
self.HasNeighbor = 0
|
|
|
|
|
|
|