From 895d646b0d992bbdfb6d27650019da0e82361377 Mon Sep 17 00:00:00 2001 From: gfawcett Date: Sat, 5 Mar 2011 18:42:33 +0000 Subject: [PATCH] uwindsor integration: LDAP fuzzy person-lookup rewritten in Python. This used to require an external executable called SpeedLookup, whose functionality is now replaced by uwindsor_fuzzy_lookup.py. git-svn-id: svn://svn.open-ils.org/ILS-Contrib/servres/trunk@1257 6d9bc8c9-1ec2-4278-b937-99fde70a366f --- conifer/integration/uwindsor.py | 56 ++++++++---------- conifer/integration/uwindsor_fuzzy_lookup.py | 87 ++++++++++++++++++++++++++++ conifer/syrup/views/sites.py | 4 +- 3 files changed, 114 insertions(+), 33 deletions(-) create mode 100755 conifer/integration/uwindsor_fuzzy_lookup.py diff --git a/conifer/integration/uwindsor.py b/conifer/integration/uwindsor.py index b1591e2..ef175fa 100644 --- a/conifer/integration/uwindsor.py +++ b/conifer/integration/uwindsor.py @@ -17,6 +17,7 @@ import re import traceback import subprocess import uwindsor_campus_info +import uwindsor_fuzzy_lookup # USE_Z3950: if True, use Z39.50 for catalogue search; if False, use OpenSRF. # Don't set this value directly here: rather, if there is a valid Z3950_CONFIG @@ -374,37 +375,30 @@ def decode_role(role): else: return 'STUDT' -FUZZY_LOOKUP_BIN = '/usr/local/bin/SpeedLookup' - -if os.path.isfile(FUZZY_LOOKUP_BIN): - - def fuzzy_person_lookup(query, include_students=False): - """ - Given a query, return a list of users who probably match the - query. The result is a list of (userid, display), where userid - is the campus userid of the person, and display is a string - suitable for display in a results-list. Include_students - indicates that students, and not just faculty/staff, should be - included in the results. - """ - - cmd = [FUZZY_LOOKUP_BIN, query] - if include_students: - cmd.append('students') - - p = subprocess.Popen(cmd, stdout=subprocess.PIPE) - try: - rdr = csv.reader(p.stdout) - rdr.next() # skip header row, - data = list(rdr) # eagerly fetch the rest - finally: - p.stdout.close() - - out = [] - for uid, sn, given, role, dept, mail in data: - display = '%s %s. %s, %s. <%s>. [%s]' % (given, sn, role, dept, mail, uid) - out.append((uid, display)) - return out +def fuzzy_person_lookup(query, include_students=False): + """ + Given a query, return a list of users who probably match the + query. The result is a list of (userid, display), where userid + is the campus userid of the person, and display is a string + suitable for display in a results-list. Include_students + indicates that students, and not just faculty/staff, should be + included in the results. + """ + # Note, our 'include_students' option only matches students on exact + # userids. That is, fuzzy matching only works for staff, faculty, and + # other non-student roles. + + filter = uwindsor_fuzzy_lookup.build_filter(query, include_students) + results = uwindsor_fuzzy_lookup.search(filter) + + out = [] + for res in results: + if not 'employeeType' in res: + res['employeeType'] = 'Student' # a 99% truth! + display = ('%(givenName)s %(sn)s. %(employeeType)s, ' + '%(uwinDepartment)s. <%(mail)s>. [%(uid)s]') % res + out.append((res['uid'], display)) + return out def derive_group_code_from_section(site, section): diff --git a/conifer/integration/uwindsor_fuzzy_lookup.py b/conifer/integration/uwindsor_fuzzy_lookup.py new file mode 100755 index 0000000..4b804a0 --- /dev/null +++ b/conifer/integration/uwindsor_fuzzy_lookup.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +"A rewrite of SpeedLookup in Python." + +import ldap +import csv, sys + +MIN_QUERY_LENGTH = 3 + +BASE = "ou=people,dc=uwindsor,dc=ca" +SCOPE = ldap.SCOPE_ONELEVEL +ATTRS = ["uid", "sn", "eduPersonNickname", "employeeType", + "uwinDepartment", "mail", "givenName"] + +# fetch server connection details from /etc/ldap-agent + +tmp = [line.strip() for line in file('/etc/ldap-agent')] +SERVER, USER, PWD = tmp[:3] + +# --------------------------------------------------------------------------- +# filter construction + +def _or(*parts): + return '|%s' % ''.join(['(%s)' % p for p in parts]) + +def _and(*parts): + return '&%s' % ''.join(['(%s)' % p for p in parts]) + +def build_filter(query, include_students=False): + if len(query) < MIN_QUERY_LENGTH: + return None + else: + query = query.lower() + pattern = _and(_or('uid=%(query)s*', + 'sn=%(query)s*', + 'givenName=%(query)s*'), + _or(*['employeeType=%s' % x for x in + ('Faculty', 'Administration', + 'Staff', 'Librarian', + 'Academic*')])) + if include_students: + # we only match students by uid. + pattern = _or('uid=%(query)s', pattern) + return '(%s)' % (pattern % locals()) + +# --------------------------------------------------------------------------- +# LDAP interaction + +def search(filt): + if not filt: + return [] + + conn = ldap.open(SERVER) + conn.simple_bind_s(USER, PWD) + results = conn.search_s(BASE, SCOPE, filt, ATTRS) + + for (cn, dct) in results: + dct['cn'] = cn + if dct.get('eduPersonNickname'): + dct['givenName'] = dct['eduPersonNickname'] + for attr in dct: + dct[attr] = dct[attr][0] + + dicts = [dct for cn,dct in results] + dicts.sort(key=lambda dct: (dct['sn'], dct['givenName'])) + conn.unbind_s() + return dicts + +# --------------------------------------------------------------------------- +# main + +if __name__ == '__main__': + # the headings to print, and their corresponding ldap attributes. + HEADINGS = ["uid", "surname", "given", "type", "department", "email"] + MAPPING = ["uid", "sn", "givenName", "employeeType", "uwinDepartment", "mail"] + + out = csv.writer(sys.stdout) + query = sys.argv[1] + students = 'students' in sys.argv + filt = build_filter(query, include_students=students) + results = search(filt) + + # print them + out.writerow(HEADINGS) + for dct in results: + row = [dct.get(key,'') for key in MAPPING] + out.writerow(row) diff --git a/conifer/syrup/views/sites.py b/conifer/syrup/views/sites.py index 4d9c63b..7af9c35 100644 --- a/conifer/syrup/views/sites.py +++ b/conifer/syrup/views/sites.py @@ -214,8 +214,8 @@ def site_join(request, site_id): @admin_only def site_fuzzy_user_lookup(request): - query = request.POST.get('q').lower().strip() - include_students = (request.POST.get('includeStudents') == 'true') + query = request.REQUEST.get('q').lower().strip() + include_students = (request.REQUEST.get('includeStudents') == 'true') results = callhook('fuzzy_person_lookup', query, include_students) or [] limit = 10 resp = {'results': results[:limit], -- 2.11.0