From: artunit Date: Mon, 17 Aug 2009 05:05:04 +0000 (+0000) Subject: whoops, add missing z3950 setup X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=7c07b72ddda2e58a4109f4c4309ace2c7f90101d;p=Syrup.git whoops, add missing z3950 setup git-svn-id: svn://svn.open-ils.org/ILS-Contrib/servres/trunk@620 6d9bc8c9-1ec2-4278-b937-99fde70a366f --- diff --git a/conifer/libsystems/z3950/pyz3950_search.py b/conifer/libsystems/z3950/pyz3950_search.py new file mode 100644 index 0000000..bd2bfc3 --- /dev/null +++ b/conifer/libsystems/z3950/pyz3950_search.py @@ -0,0 +1,101 @@ +# z39.50 search using yaz-client. +# dependencies: yaz-client, pexpect + +# I found that pyz3950.zoom seemed wonky when testing against conifer +# z3950, so I whipped up this expect-based version instead. + +import warnings +import re +import sys +from marcxml import marcxml_to_dictionary + +try: + + import profile + import lex + import yacc +except ImportError: + + sys.modules['profile'] = sys # just get something called 'profile'; + # it's not actually used. + import ply.lex + import ply.yacc # pyz3950 thinks these are toplevel modules. + sys.modules['lex'] = ply.lex + sys.modules['yacc'] = ply.yacc + +# for Z39.50 support, not sure whether this is the way to go yet but +# as generic as it gets +from PyZ3950 import zoom, zmarc + + +LOG = None # for pexpect debugging, try LOG = sys.stderr +GENERAL_TIMEOUT = 40 +PRESENT_TIMEOUT = 60 + +def search(host, port, database, query, start=1, limit=10): + + + query = query.encode('utf-8') # is this okay? Is it enough?? + + conn = zoom.Connection(host, port) + conn.databaseName = database + conn.preferredRecordSyntax = 'XML' + + query = zoom.Query ('CCL', str(query)) + res = conn.search (query) + collector = [] + #if we were dealing with marc8 results, would probably need this + #m = zmarc.MARC8_to_Unicode () + + # how many to present? At most 10 for now. + to_show = min(len(res)-(start - 1), limit) + if limit: + to_show = min(to_show, limit) + + + #this seems to an efficient way of snagging the records + #would be good to cache the result set for iterative display + for r in range(start - 1,(start-1) + to_show): + #would need to translate marc8 records, evergreen doesn't need this + #collector.append(m.translate(r.data)) + collector.append(str(res.__getitem__(r)).replace('\n','')) + conn.close () + + + raw = "" . join(collector) + + raw_records = [] + err = None + + pat = re.compile('', re.M) + raw_records = pat.findall(raw) + + parsed = [] + for rec in raw_records: + try: + rec = _marc_utf8_pattern.sub(_decode_marc_utf8, rec) + dct = marcxml_to_dictionary(rec) + except 'x': + raise rec + parsed.append(dct) + return parsed, len(res) + + +# decoding MARC \X.. UTF-8 patterns. + +_marc_utf8_pattern = re.compile(r'\\X([0-9A-F]{2})') + +def _decode_marc_utf8(regex_match): + return chr(int(regex_match.group(1), 16)) + + +#------------------------------------------------------------ +# some tests + +if __name__ == '__main__': + tests = [ + ('zed.concat.ca:210', 'OSUL', 'chanson'), + ] + for host, db, query in tests: + print (host, db, query) + print len(search(host, db, query, limit=33))