--- /dev/null
+# z39.50 search using yaz-client. \r
+# dependencies: yaz-client, pexpect\r
+\r
+# I found that pyz3950.zoom seemed wonky when testing against conifer\r
+# z3950, so I whipped up this expect-based version instead.\r
+\r
+import warnings\r
+import re\r
+import sys\r
+from marcxml import marcxml_to_dictionary\r
+\r
+try:\r
+\r
+ import profile\r
+ import lex\r
+ import yacc\r
+except ImportError:\r
+\r
+ sys.modules['profile'] = sys # just get something called 'profile';\r
+ # it's not actually used.\r
+ import ply.lex\r
+ import ply.yacc # pyz3950 thinks these are toplevel modules.\r
+ sys.modules['lex'] = ply.lex\r
+ sys.modules['yacc'] = ply.yacc\r
+\r
+# for Z39.50 support, not sure whether this is the way to go yet but\r
+# as generic as it gets\r
+from PyZ3950 import zoom, zmarc\r
+\r
+\r
+LOG = None # for pexpect debugging, try LOG = sys.stderr\r
+GENERAL_TIMEOUT = 40\r
+PRESENT_TIMEOUT = 60\r
+\r
+def search(host, port, database, query, start=1, limit=10):\r
+\r
+\r
+ query = query.encode('utf-8') # is this okay? Is it enough??\r
+\r
+ conn = zoom.Connection(host, port)\r
+ conn.databaseName = database\r
+ conn.preferredRecordSyntax = 'XML'\r
+ \r
+ query = zoom.Query ('CCL', str(query))\r
+ res = conn.search (query)\r
+ collector = []\r
+ #if we were dealing with marc8 results, would probably need this\r
+ #m = zmarc.MARC8_to_Unicode ()\r
+\r
+ # how many to present? At most 10 for now.\r
+ to_show = min(len(res)-(start - 1), limit)\r
+ if limit:\r
+ to_show = min(to_show, limit)\r
+\r
+\r
+ #this seems to an efficient way of snagging the records\r
+ #would be good to cache the result set for iterative display\r
+ for r in range(start - 1,(start-1) + to_show):\r
+ #would need to translate marc8 records, evergreen doesn't need this\r
+ #collector.append(m.translate(r.data))\r
+ collector.append(str(res.__getitem__(r)).replace('\n',''))\r
+ conn.close ()\r
+\r
+\r
+ raw = "" . join(collector)\r
+\r
+ raw_records = []\r
+ err = None\r
+\r
+ pat = re.compile('<record .*?</record>', re.M)\r
+ raw_records = pat.findall(raw)\r
+\r
+ parsed = []\r
+ for rec in raw_records:\r
+ try:\r
+ rec = _marc_utf8_pattern.sub(_decode_marc_utf8, rec)\r
+ dct = marcxml_to_dictionary(rec)\r
+ except 'x':\r
+ raise rec\r
+ parsed.append(dct)\r
+ return parsed, len(res)\r
+\r
+\r
+# decoding MARC \X.. UTF-8 patterns.\r
+\r
+_marc_utf8_pattern = re.compile(r'\\X([0-9A-F]{2})')\r
+\r
+def _decode_marc_utf8(regex_match):\r
+ return chr(int(regex_match.group(1), 16))\r
+\r
+\r
+#------------------------------------------------------------\r
+# some tests\r
+\r
+if __name__ == '__main__':\r
+ tests = [\r
+ ('zed.concat.ca:210', 'OSUL', 'chanson'),\r
+ ]\r
+ for host, db, query in tests:\r
+ print (host, db, query)\r
+ print len(search(host, db, query, limit=33))\r