From: gfawcett Date: Wed, 6 May 2009 00:01:08 +0000 (+0000) Subject: Cleaned up lib integration. Moved more lib settings to settings.py. Better UTF-8... X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=cb5f201da6f6bede9f65fd5499ec8a62cb3db698;p=Syrup.git Cleaned up lib integration. Moved more lib settings to settings.py. Better UTF-8 support in z3950 search. Still not perfect, but cleaner. Hard-coded references to EG server are now in settings.py, as are the names of other servers (Z39.50, SIP). I had been using a file called 'marctools' by Joel Hardi to do what turned out to be basic UTF-8 encoding, so I've replaced his code with a simple regex-based substitution (in yaz_search.py). git-svn-id: svn://svn.open-ils.org/ILS-Contrib/servres/trunk@454 6d9bc8c9-1ec2-4278-b937-99fde70a366f --- diff --git a/conifer/custom/auth_evergreen.py b/conifer/custom/auth_evergreen.py index f7c361e..f40ed48 100644 --- a/conifer/custom/auth_evergreen.py +++ b/conifer/custom/auth_evergreen.py @@ -5,8 +5,10 @@ from django.conf import settings class EvergreenAuthBackend(EvergreenAuthServer): def __init__(self): + assert settings.EVERGREEN_GATEWAY_SERVER, \ + 'EvergreenAuthBackend requires settings.EVERGREEN_GATEWAY_SERVER' EvergreenAuthServer.__init__( - self, settings.EVERGREEN_XMLRPC_SERVER) + self, settings.EVERGREEN_GATEWAY_SERVER) def authenticate(self, username=None, password=None): pwd_valid = self.login(username, password) diff --git a/conifer/custom/lib_integration.py b/conifer/custom/lib_integration.py index b04bb9a..3de2070 100644 --- a/conifer/custom/lib_integration.py +++ b/conifer/custom/lib_integration.py @@ -12,7 +12,6 @@ # SIP for patron and item_info, and for item checkout and checkin, # OpenSRF for extended item info. - # define a @caching decorator to exploit the Django cache. Fixme, move # this somewhere else. from django.core.cache import cache @@ -38,8 +37,10 @@ def caching(prefix, timeout=60): from django.conf import settings -#LIBINT = settings.LIBRARY_INTEGRATION # more on this later. +from conifer.libsystems.evergreen.support import initialize +EG_BASE = 'http://%s/' % settings.EVERGREEN_GATEWAY_SERVER +initialize(EG_BASE) from conifer.libsystems.evergreen import item_status as I from conifer.libsystems.sip.sipclient import SIP @@ -82,9 +83,9 @@ def bib_id_to_marcxml(bib_id): def cat_search(query, start=1, limit=10): # this is a total hack for conifer. If the query is a Conifer # title-detail URL, then return just that one item. - if query.startswith('http://concat'): + if query.startswith(EG_BASE): results = marcxml_to_dictionary(I.url_to_marcxml(query), multiples=True) else: - cat_host, cat_db = ('zed.concat.ca:210', 'OWA') + cat_host, cat_db = settings.Z3950_CONFIG results = yaz_search.search(cat_host, cat_db, query, start, limit) return results diff --git a/conifer/libsystems/evergreen/item_status.py b/conifer/libsystems/evergreen/item_status.py index 709d11e..faa310c 100644 --- a/conifer/libsystems/evergreen/item_status.py +++ b/conifer/libsystems/evergreen/item_status.py @@ -1,3 +1,4 @@ +import support from support import ER, E1 import re import urllib2 @@ -14,22 +15,21 @@ def bib_id_to_marcxml(bib_id): def url_to_marcxml(url): # this is a hack. Given a opac Title Details url, return marcxml. - if url.startswith('http://concat.ca'): + assert support.BASE, 'no EG BASE. Did you call support.initialize()?' + if url.startswith(support.BASE): if 'feed/bookbag' in url: #eg http://concat.ca/opac/extras/feed/bookbag/marcxml-full/60 - #http://concat.ca/opac/extras/feed/bookbag/html-full/60 marc_url = re.sub(r'(.*/bookbag/)(.*?)(/.*)', r'\1marcxml-full\3', url) xml = urllib2.urlopen(marc_url).read() else: m = re.match(r'.*r=(\d+).*', url) item_id = m and m.group(1) or None if item_id: - marc_url = ("http://concat.ca/opac/extras" - "/supercat/retrieve/marcxml/record/" + item_id) + marc_url = ("%s/opac/extras/supercat/" + "retrieve/marcxml/record/%s" % (support.BASE, item_id)) xml = urllib2.urlopen(marc_url).read() return xml if __name__ == '__main__': -# from pprint import pprint -# print bib_id_to_marcxml(barcode_to_bib_id(31862016799294)) - print url_to_marcxml('http://concat.ca/opac/en-US/skin/default/xml/rdetail.xml?r=1082665&t=dylan%20thomas%20ralph&tp=keyword&d=0&hc=14&rt=keyword') + support.initialize('http://www.concat.ca/') + print url_to_marcxml('http://www.concat.ca/opac/en-US/skin/default/xml/rdetail.xml?r=1082665&t=dylan%20thomas%20ralph&tp=keyword&d=0&hc=14&rt=keyword') diff --git a/conifer/libsystems/evergreen/support.py b/conifer/libsystems/evergreen/support.py index 7e8c753..cda0dc5 100644 --- a/conifer/libsystems/evergreen/support.py +++ b/conifer/libsystems/evergreen/support.py @@ -6,32 +6,24 @@ from xml.etree import ElementTree import re import sys, os -#------------------------------------------------------------ -# Configuration - -# where is our evergreen server's opensrf http gateway? - -BASE = 'http://concat.ca/osrf-gateway-v1' LOCALE = 'en-US' -# where can I find a copy of fm_IDL.xml from Evergreen? - -# # This will work always, though maybe you want to up the rev number... -# FM_IDL_LOCATION = ('http://svn.open-ils.org/trac/ILS/export/12640' -# '/trunk/Open-ILS/examples/fm_IDL.xml') - -# # or, if you have a local copy... -# FM_IDL_LOCATION = 'file:fm_IDL.xml' - -FM_IDL_LOCATION = 'http://concat.ca/reports/fm_IDL.xml' -here = lambda s: os.path.join(os.path.dirname(__file__), s) -FM_IDL_LOCATION = 'file:' + here('fm_IDL.xml') - #------------------------------------------------------------ # parse fm_IDL, to build a field-name-lookup service. +fields_for_class = {} +BASE = None + +def initialize(base): + global BASE + if not BASE: + assert base.endswith('/') + BASE = base + fields_for_class.update(dict(_fields())) + def _fields(): - tree = ElementTree.parse(urllib2.urlopen(FM_IDL_LOCATION)) + fm_IDL_location = BASE + 'reports/fm_IDL.xml' + tree = ElementTree.parse(urllib2.urlopen(fm_IDL_location)) NS = '{http://opensrf.org/spec/IDL/base/v1}' for c in tree.findall('%sclass' % NS): cid = c.attrib['id'] @@ -39,7 +31,6 @@ def _fields(): for f in c.findall('%sfields/%sfield' % (NS,NS))] yield (cid, fields) -fields_for_class = dict(_fields()) #------------------------------------------------------------ @@ -62,7 +53,7 @@ def evergreen_request(method, *args, **kwargs): kwargs.update({'service':service, 'method':method}) params = ['%s=%s' % (k,quote(v)) for k,v in kwargs.items()] params += ['param=%s' % quote(str(a)) for a in args] - url = '%s?%s' % (BASE, '&'.join(params)) + url = '%sosrf-gateway-v1?%s' % (BASE, '&'.join(params)) req = urllib2.urlopen(url) resp = json.load(req) assert resp['status'] == 200, 'error during evergreen request' diff --git a/conifer/libsystems/z3950/marctools.py b/conifer/libsystems/z3950/marctools.py deleted file mode 100644 index a06f14c..0000000 --- a/conifer/libsystems/z3950/marctools.py +++ /dev/null @@ -1,114 +0,0 @@ -""" - MARC utlities - Public Domain 2007 public.resource.org - - Author: Joel Hardi -""" - -class locToUTF8(object): - "Changes text from LOC into unicode, using replace() method" - - dict = {} - charmap = {} - - def __init__(self): - "Sets self.dict and search character index self.charmap" - self.dict = { - "\X20":"\u0020", # "HARD SPACE - represented by a space" - "\XC2\XA1":"\u00A1", # "INVERTED EXCLAMATION MARK" - "\XC2\XA3":"\u00A3", # "BRITISH POUND / POUND SIGN" - "\XC2\XA9":"\u00A9", # "COPYRIGHT SIGN" - "\XC2\XAE":"\u00AE", # "PATENT MARK / REGISTERED SIGN" - "\XC2\XB0":"\u00B0", # "DEGREE SIGN" - "\XC2\XB1":"\u00B1", # "PLUS OR MINUS / PLUS-MINUS SIGN" - "\XC2\XB7":"\u00B7", # "MIDDLE DOT" - "\XC2\XBF":"\u00BF", # "INVERTED QUESTION MARK" - "\XC3\X86":"\u00C6", # "UPPERCASE DIGRAPH AE / LATIN CAPITAL LIGATURE AE" - "\XC3\X98":"\u00D8", # "UPPERCASE SCANDINAVIAN O / LATIN CAPITAL LETTER O WITH STROKE" - "\XC3\X9E":"\u00DE", # "UPPERCASE ICELANDIC THORN / LATIN CAPITAL LETTER THORN (Icelandic)" - "\XC3\XA6":"\u00E6", # "LOWERCASE DIGRAPH AE / LATIN SMALL LIGATURE AE" - "\XC3\XB0":"\u00F0", # "LOWERCASE ETH / LATIN SMALL LETTER ETH (Icelandic)" - "\XC3\XB8":"\u00F8", # "LOWERCASE SCANDINAVIAN O / LATIN SMALL LETTER O WITH STROKE" - "\XC3\XBE":"\u00FE", # "LOWERCASE ICELANDIC THORN / LATIN SMALL LETTER THORN (Icelandic)" - "\XC4\X90":"\u0110", # "UPPERCASE D WITH CROSSBAR / LATIN CAPITAL LETTER D WITH STROKE" - "\XC4\X91":"\u0111", # "LOWERCASE D WITH CROSSBAR / LATIN SMALL LETTER D WITH STROKE" - "\XC4\XB1":"\u0131", # "LOWERCASE TURKISH I / LATIN SMALL LETTER DOTLESS I" - "\XC5\X81":"\u0141", # "UPPERCASE POLISH L / LATIN CAPITAL LETTER L WITH STROKE" - "\XC5\X82":"\u0142", # "LOWERCASE POLISH L / LATIN SMALL LETTER L WITH STROKE" - "\XC5\X92":"\u0152", # "UPPERCASE DIGRAPH OE / LATIN CAPITAL LIGATURE OE" - "\XC5\X93":"\u0153", # "LOWERCASE DIGRAPH OE / LATIN SMALL LIGATURE OE" - "\XC6\XA0":"\u01A0", # "UPPERCASE O-HOOK / LATIN CAPITAL LETTER O WITH HORN" - "\XC6\XA1":"\u01A1", # "LOWERCASE O-HOOK / LATIN SMALL LETTER O WITH HORN" - "\XC6\XAF":"\u01AF", # "UPPERCASE U-HOOK / LATIN CAPITAL LETTER U WITH HORN" - "\XC6\XB0":"\u01B0", # "LOWERCASE U-HOOK / LATIN SMALL LETTER U WITH HORN" - "\XCA\XB9":"\u02B9", # "SOFT SIGN, PRIME / MODIFIER LETTER PRIME" - "\XCA\XBA":"\u02BA", # "HARD SIGN, DOUBLE PRIME / MODIFIER LETTER DOUBLE PRIME" - "\XCA\XBB":"\u02BB", # "AYN / MODIFIER LETTER TURNED COMMA" - "\XCA\XBE":"\u02BE", # "ALIF / MODIFIER LETTER RIGHT HALF RING" - "\XCC\X80":"\u0300", # "GRAVE / COMBINING GRAVE ACCENT (Varia)" - "\XCC\X81":"\u0301", # "ACUTE / COMBINING ACUTE ACCENT (Oxia)" - "\XCC\X82":"\u0302", # "CIRCUMFLEX / COMBINING CIRCUMFLEX ACCENT" - "\XCC\X83":"\u0303", # "TILDE / COMBINING TILDE" - "\XCC\X84":"\u0304", # "MACRON / COMBINING MACRON" - "\XCC\X86":"\u0306", # "BREVE / COMBINING BREVE (Vrachy)" - "\XCC\X87":"\u0307", # "SUPERIOR DOT / COMBINING DOT ABOVE" - "\XCC\X88":"\u0308", # "UMLAUT, DIAERESIS / COMBINING DIAERESIS (Dialytika)" - "\XCC\X89":"\u0309", # "PSEUDO QUESTION MARK / COMBINING HOOK ABOVE" - "\XCC\X8A":"\u030A", # "CIRCLE ABOVE, ANGSTROM / COMBINING RING ABOVE" - "\XCC\X8B":"\u030B", # "DOUBLE ACUTE / COMBINING DOUBLE ACUTE ACCENT" - "\XCC\X8C":"\u030C", # "HACEK / COMBINING CARON" - "\XCC\X90":"\u0310", # "CANDRABINDU / COMBINING CANDRABINDU" - "\XCC\X93":"\u0313", # "HIGH COMMA, CENTERED / COMBINING COMMA ABOVE (Psili)" - "\XCC\X95":"\u0315", # "HIGH COMMA, OFF CENTER / COMBINING COMMA ABOVE RIGHT" - "\XCC\X9C":"\u031C", # "RIGHT CEDILLA / COMBINING LEFT HALF RING BELOW" - "\XCC\XA3":"\u0323", # "DOT BELOW / COMBINING DOT BELOW" - "\XCC\XA4":"\u0324", # "DOUBLE DOT BELOW / COMBINING DIAERESIS BELOW" - "\XCC\XA5":"\u0325", # "CIRCLE BELOW / COMBINING RING BELOW" - "\XCC\XA6":"\u0326", # "LEFT HOOK (COMMA BELOW) / COMBINING COMMA BELOW" - "\XCC\XA7":"\u0327", # "CEDILLA / COMBINING CEDILLA" - "\XCC\XA8":"\u0328", # "RIGHT HOOK, OGONEK / COMBINING OGONEK" - "\XCC\XAE":"\u032E", # "UPADHMANIYA / COMBINING BREVE BELOW" - "\XCC\XB2":"\u0332", # "UNDERSCORE / COMBINING LOW LINE" - "\XCC\XB3":"\u0333", # "DOUBLE UNDERSCORE / COMBINING DOUBLE LOW LINE" - "\XE2\X84\X93":"\u2113", # "SCRIPT SMALL L" - "\XE2\X84\X97":"\u2117", # "SOUND RECORDING COPYRIGHT" - "\XE2\X99\XAD":"\u266D", # "MUSIC FLAT SIGN" - "\XE2\X99\XAF":"\u266F", # "MUSIC SHARP SIGN" - "\XEF\XB8\XA0":"\uFE20", # "LIGATURE, FIRST HALF / COMBINING LIGATURE LEFT HALF" - "\XEF\XB8\XA1":"\uFE21", # "LIGATURE, SECOND HALF / COMBINING LIGATURE RIGHT HALF" - "\XEF\XB8\XA2":"\uFE22", # "DOUBLE TILDE, FIRST HALF / COMBINING DOUBLE TILDE LEFT HALF" - "\XEF\XB8\XA3":"\uFE23", # "DOUBLE TILDE, SECOND HALF / COMBINING DOUBLE TILDE RIGHT HALF" - } - - # build self.charmap to map each first char of a search string to a list of its search strings - firstchars = [] - self.charmap = {} - for i in self.dict.iterkeys(): - if firstchars.count(i[0]) == 0: - firstchars.append(i[0]) - self.charmap[i[0]] = [] - self.charmap[i[0]].append(i) - - def replace(self, str): - "Given string str, returns unicode string with correct character replcements" - if isinstance(str, unicode): # added by Graham - return str - searchchars = [] - # build subset of search/replace pairs to use based on if first char of search appears in str - prev = range(0,3) - for c in str: - prev[0] = prev[1] - prev[1] = prev[2] - prev[2] = c - if self.charmap.has_key(c): - if searchchars.count(c) == 0: - searchchars.append(c) - elif ord(c) > 127 and prev.count(c) == 0: - str = str.replace(c, '\\X%x' % ord(c)) - - # perform search/replaces - for c in searchchars: - for i in self.charmap[c]: - str = str.replace(i, self.dict[i]) - - return unicode(str, 'raw-unicode-escape') diff --git a/conifer/libsystems/z3950/marcxml.py b/conifer/libsystems/z3950/marcxml.py index 741556a..6e9930d 100644 --- a/conifer/libsystems/z3950/marcxml.py +++ b/conifer/libsystems/z3950/marcxml.py @@ -1,7 +1,4 @@ from xml.etree import ElementTree -import marctools - -loc_to_unicode = marctools.locToUTF8().replace def marcxml_to_dictionary(rec, multiples=False): tree = ElementTree.fromstring(rec) @@ -19,8 +16,9 @@ def marcxml_to_dictionary(rec, multiples=False): t = df.attrib['tag'] for sf in df.findall('{http://www.loc.gov/MARC21/slim}subfield'): c = sf.attrib['code'] - v = sf.text - dct[t+c] = loc_to_unicode(v) + v = sf.text or '' + dct.setdefault(t+c, []).append(v) + dct = dict((k,'\n'.join(v or [])) for k,v in dct.items()) out.append(dct) if multiples is False: return out and out[0] or None @@ -44,10 +42,11 @@ def marcxml_dictionary_to_dc(dct): title += (' %s' % dct['245b']) # if title ends with a single character, strip it. usually a # spurious punctuation character. - init, last = title.rsplit(' ',1) - if len(last) == 1: - title = init - out['dc:title'] = title + if ' ' in title: + init, last = title.rsplit(' ',1) + if len(last) == 1: + title = init + out['dc:title'] = title return out diff --git a/conifer/libsystems/z3950/yaz_search.py b/conifer/libsystems/z3950/yaz_search.py index 35a5c7e..fcd2767 100644 --- a/conifer/libsystems/z3950/yaz_search.py +++ b/conifer/libsystems/z3950/yaz_search.py @@ -10,12 +10,12 @@ import pexpect import sys from marcxml import marcxml_to_dictionary -LOG = sys.stderr #None # for pexpect debugging, try LOG = sys.stderr +LOG = None # for pexpect debugging, try LOG = sys.stderr YAZ_CLIENT = 'yaz-client' GENERAL_TIMEOUT = 40 PRESENT_TIMEOUT = 60 -def search(host, database, query, start=1, limit=None): +def search(host, database, query, start=1, limit=10): # first, let's look at our query. I'm assuming @prefix queries for # now, so we need to put queries in that form if they aren't @@ -46,7 +46,7 @@ def search(host, database, query, start=1, limit=None): return [] # how many to present? At most 10 for now. - to_show = min(numhits-1, 10) # minus 1 for dwarf ?? + to_show = min(numhits-1, limit) if limit: to_show = min(to_show, limit) server.expect('Z>') @@ -60,9 +60,11 @@ def search(host, database, query, start=1, limit=None): raw_records = [] err = None + server.expect('.*Record type: XML') server.expect('nextResultSetPosition') pat = re.compile('', re.M) - raw_records = pat.findall(server.before) + raw = server.before.replace('\n','') + raw_records = pat.findall(raw) server.expect('Z>') server.sendline('quit') server.close() @@ -70,21 +72,30 @@ def search(host, database, query, start=1, limit=None): parsed = [] for rec in raw_records: try: + rec = _marc_utf8_pattern.sub(_decode_marc_utf8, rec) + print type(rec) dct = marcxml_to_dictionary(rec) - except: + except 'x': raise rec parsed.append(dct) return parsed +# decoding MARC \X.. UTF-8 patterns. + +_marc_utf8_pattern = re.compile(r'\\X([0-9A-F]{2})') + +def _decode_marc_utf8(regex_match): + return chr(int(regex_match.group(1), 16)) + + #------------------------------------------------------------ # some tests if __name__ == '__main__': tests = [ - ('concat.ca:2210', 'conifer', '@and "Musson" "Evil"'), - ('concat.ca:2210', 'conifer', '@and "Denis" "Gravel"'), - ('z3950.loc.gov:7090', 'VOYAGER', '@attr 1=4 @attr 4=1 "dylan"')] + ('zed.concat.ca:210', 'OSUL', 'chanson'), + ] for host, db, query in tests: print (host, db, query) - print search(host, db, query, limit=1) + print len(search(host, db, query, limit=33)) diff --git a/conifer/settings.py b/conifer/settings.py index a1a0f89..dc13638 100644 --- a/conifer/settings.py +++ b/conifer/settings.py @@ -1,8 +1,5 @@ # Django settings for conifer project. -# make sure you have a local_settings.py file! Copy from -# local_settings.py.in and customize that file. - import os os.environ['PYTHON_EGG_CACHE'] = '/tmp/eggs' @@ -86,11 +83,7 @@ MIDDLEWARE_CLASSES = ( ROOT_URLCONF = 'conifer.urls' -TEMPLATE_DIRS = ( - # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". - # Always use forward slashes, even on Windows. - # Don't forget to use absolute paths, not relative paths. -) +TEMPLATE_DIRS = [] INSTALLED_APPS = ( 'django.contrib.auth', @@ -106,32 +99,23 @@ AUTH_PROFILE_MODULE = 'syrup.UserProfile' AUTHENTICATION_BACKENDS = ( 'django.contrib.auth.backends.ModelBackend', + # uncomment for EG authentication: + #'conifer.custom.auth_evergreen.EvergreenAuthBackend', ) -# more on this later. -LIBRARY_INTEGRATION = { - 'patron_info': 'SIP', - 'item_status': 'SIP', - 'item_info' : 'OpenSRF', - 'catalogue' : 'Z39.50', -} - -EVERGREEN_XMLRPC_SERVER = None # evergreen host, for auth, e.g. '192.168.1.10' - -if EVERGREEN_XMLRPC_SERVER: - AUTHENTICATION_BACKENDS.append( - 'conifer.custom.auth_evergreen.EvergreenAuthBackend') +EVERGREEN_GATEWAY_SERVER = 'www.concat.ca' +Z3950_CONFIG = ('zed.concat.ca:210', 'OWA') #OWA,OSUL,CONIFER +SIP_HOST = ('dwarf.cs.uoguelph.ca', 8080) try: - # Graham has this right now; it's not official Syrup. Nothing to see here. - from private_local_settings import SIP_HOST, SIP_CREDENTIALS + from private_local_settings import SIP_CREDENTIALS except: # stuff that I really ought not check into svn... - #SIP_HOST = ('hostname', 9999) #SIP_CREDENTIALS = ('userid', 'password', 'location') pass + #CACHE_BACKEND = 'memcached://127.0.0.1:11211/' #CACHE_BACKEND = 'db://test_cache_table' #CACHE_BACKEND = 'locmem:///' diff --git a/conifer/syrup/views/items.py b/conifer/syrup/views/items.py index b0f9367..0ffb38c 100644 --- a/conifer/syrup/views/items.py +++ b/conifer/syrup/views/items.py @@ -174,7 +174,8 @@ def item_add_cat_search(request, course_id, item_id): if not raw_pickitem: # process the query. assert query, 'must provide a query.' - results = lib_integration.cat_search(query) + start, limit = (1, 20) + results = lib_integration.cat_search(query, start, limit) return g.render('item/item_add_cat_search.xhtml', results=results, query=query, course=course, parent_item=parent_item)