# this is a total hack for conifer. If the query is a Conifer
# title-detail URL, then return just that one item.
if query.startswith('http://dwarf'):
- results = [marcxml_to_dictionary(I.url_to_marcxml(query))]
+ results = marcxml_to_dictionary(I.url_to_marcxml(query), multiples=True)
else:
cat_host, cat_db = ('dwarf.cs.uoguelph.ca:2210', 'conifer')
results = yaz_search.search(cat_host, cat_db, query, start, limit)
def url_to_marcxml(url):
# this is a hack. Given a opac Title Details url, return marcxml.
if url.startswith('http://dwarf.cs.uoguelph.ca'):
- m = re.match(r'.*r=(\d+).*', url)
- item_id = m and m.group(1) or None
- if item_id:
- marc_url = ("http://dwarf.cs.uoguelph.ca/opac/extras"
- "/supercat/retrieve/marcxml/record/" + item_id)
- xml = urllib2.urlopen(marc_url).read()
+ if 'feed/bookbag' in url:
+ #eg http://dwarf.cs.uoguelph.ca/opac/extras/feed/bookbag/marcxml-full/60
+ #http://dwarf.cs.uoguelph.ca/opac/extras/feed/bookbag/html-full/60
+ marc_url = re.sub(r'(.*/bookbag/)(.*?)(/.*)', r'\1marcxml-full\3', url)
+ xml = urllib2.urlopen(marc_url).read()
+ else:
+ m = re.match(r'.*r=(\d+).*', url)
+ item_id = m and m.group(1) or None
+ if item_id:
+ marc_url = ("http://dwarf.cs.uoguelph.ca/opac/extras"
+ "/supercat/retrieve/marcxml/record/" + item_id)
+ xml = urllib2.urlopen(marc_url).read()
return xml
if __name__ == '__main__':
loc_to_unicode = marctools.locToUTF8().replace
-def marcxml_to_dictionary(rec):
+def marcxml_to_dictionary(rec, multiples=False):
tree = ElementTree.fromstring(rec)
if tree.tag == '{http://www.loc.gov/MARC21/slim}collection':
- # thenwe only look at the first record.
- tree = tree.find('{http://www.loc.gov/MARC21/slim}record')
- dct = {}
- for df in tree.findall('{http://www.loc.gov/MARC21/slim}datafield'):
- t = df.attrib['tag']
- for sf in df.findall('{http://www.loc.gov/MARC21/slim}subfield'):
- c = sf.attrib['code']
- v = sf.text
- dct[t+c] = loc_to_unicode(v)
- return dct
+ # then we may have multiple records
+ records = tree.findall('{http://www.loc.gov/MARC21/slim}record')
+ elif tree.tag == '{http://www.loc.gov/MARC21/slim}record':
+ records = [tree]
+ else:
+ return []
+ out = []
+ for r in records:
+ dct = {}
+ for df in r.findall('{http://www.loc.gov/MARC21/slim}datafield'):
+ t = df.attrib['tag']
+ for sf in df.findall('{http://www.loc.gov/MARC21/slim}subfield'):
+ c = sf.attrib['code']
+ v = sf.text
+ dct[t+c] = loc_to_unicode(v)
+ out.append(dct)
+ if multiples is False:
+ return out and out[0] or None
+ else:
+ return out
def marcxml_dictionary_to_dc(dct):
"""Take a dictionary generated by marcxml_to_dictionary, and