uwindsor integration: LDAP fuzzy person-lookup rewritten in Python.
authorgfawcett <gfawcett@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Sat, 5 Mar 2011 18:42:33 +0000 (18:42 +0000)
committergfawcett <gfawcett@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Sat, 5 Mar 2011 18:42:33 +0000 (18:42 +0000)
This used to require an external executable called SpeedLookup, whose
functionality is now replaced by uwindsor_fuzzy_lookup.py.

git-svn-id: svn://svn.open-ils.org/ILS-Contrib/servres/trunk@1257 6d9bc8c9-1ec2-4278-b937-99fde70a366f

conifer/integration/uwindsor.py
conifer/integration/uwindsor_fuzzy_lookup.py [new file with mode: 0755]
conifer/syrup/views/sites.py

index b1591e2..ef175fa 100644 (file)
@@ -17,6 +17,7 @@ import re
 import traceback
 import subprocess
 import uwindsor_campus_info
+import uwindsor_fuzzy_lookup
 
 # USE_Z3950: if True, use Z39.50 for catalogue search; if False, use OpenSRF.
 # Don't set this value directly here: rather, if there is a valid Z3950_CONFIG
@@ -374,37 +375,30 @@ def decode_role(role):
     else:
         return 'STUDT'
 
-FUZZY_LOOKUP_BIN = '/usr/local/bin/SpeedLookup'
-
-if os.path.isfile(FUZZY_LOOKUP_BIN):
-
-    def fuzzy_person_lookup(query, include_students=False):
-        """
-        Given a query, return a list of users who probably match the
-        query. The result is a list of (userid, display), where userid
-        is the campus userid of the person, and display is a string
-        suitable for display in a results-list. Include_students
-        indicates that students, and not just faculty/staff, should be
-        included in the results.
-        """
-
-        cmd = [FUZZY_LOOKUP_BIN, query]
-        if include_students:
-            cmd.append('students')
-
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
-        try:
-            rdr = csv.reader(p.stdout)
-            rdr.next()              # skip header row,
-            data = list(rdr)        # eagerly fetch the rest
-        finally:
-            p.stdout.close()
-
-        out = []
-        for uid, sn, given, role, dept, mail in data:
-            display = '%s %s. %s, %s. <%s>. [%s]' % (given, sn, role, dept, mail, uid)
-            out.append((uid, display))
-        return out
+def fuzzy_person_lookup(query, include_students=False):
+    """
+    Given a query, return a list of users who probably match the
+    query. The result is a list of (userid, display), where userid
+    is the campus userid of the person, and display is a string
+    suitable for display in a results-list. Include_students
+    indicates that students, and not just faculty/staff, should be
+    included in the results.
+    """
+    # Note, our 'include_students' option only matches students on exact
+    # userids. That is, fuzzy matching only works for staff, faculty, and
+    # other non-student roles.
+
+    filter  = uwindsor_fuzzy_lookup.build_filter(query, include_students)
+    results = uwindsor_fuzzy_lookup.search(filter)
+
+    out = []
+    for res in results:
+        if not 'employeeType' in res:
+            res['employeeType'] = 'Student' # a 99% truth!
+        display = ('%(givenName)s %(sn)s. %(employeeType)s, '
+                   '%(uwinDepartment)s. <%(mail)s>. [%(uid)s]') % res
+        out.append((res['uid'], display))
+    return out
 
 
 def derive_group_code_from_section(site, section):
diff --git a/conifer/integration/uwindsor_fuzzy_lookup.py b/conifer/integration/uwindsor_fuzzy_lookup.py
new file mode 100755 (executable)
index 0000000..4b804a0
--- /dev/null
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+
+"A rewrite of SpeedLookup in Python."
+
+import ldap
+import csv, sys
+
+MIN_QUERY_LENGTH = 3
+
+BASE  = "ou=people,dc=uwindsor,dc=ca"
+SCOPE = ldap.SCOPE_ONELEVEL
+ATTRS = ["uid", "sn", "eduPersonNickname", "employeeType",
+         "uwinDepartment", "mail", "givenName"]
+
+# fetch server connection details from /etc/ldap-agent
+
+tmp = [line.strip() for line in file('/etc/ldap-agent')]
+SERVER, USER, PWD = tmp[:3]
+
+# ---------------------------------------------------------------------------
+# filter construction
+
+def _or(*parts):
+    return '|%s' % ''.join(['(%s)' % p for p in parts])
+
+def _and(*parts):
+    return '&%s' % ''.join(['(%s)' % p for p in parts])
+
+def build_filter(query, include_students=False):
+    if len(query) < MIN_QUERY_LENGTH:
+        return None
+    else:
+        query = query.lower()
+        pattern = _and(_or('uid=%(query)s*',
+                           'sn=%(query)s*',
+                           'givenName=%(query)s*'),
+                       _or(*['employeeType=%s' % x for x in
+                             ('Faculty', 'Administration',
+                              'Staff', 'Librarian', 
+                              'Academic*')]))
+        if include_students:
+            # we only match students by uid.
+            pattern = _or('uid=%(query)s', pattern)
+        return '(%s)' % (pattern % locals())
+
+# ---------------------------------------------------------------------------
+# LDAP interaction
+
+def search(filt):
+    if not filt:
+        return []
+
+    conn = ldap.open(SERVER)
+    conn.simple_bind_s(USER, PWD)
+    results = conn.search_s(BASE, SCOPE, filt, ATTRS)
+
+    for (cn, dct) in results:
+        dct['cn'] = cn
+        if dct.get('eduPersonNickname'):
+            dct['givenName'] = dct['eduPersonNickname']
+        for attr in dct:
+            dct[attr] = dct[attr][0]
+    
+    dicts = [dct for cn,dct in results]
+    dicts.sort(key=lambda dct: (dct['sn'], dct['givenName']))
+    conn.unbind_s()
+    return dicts
+
+# ---------------------------------------------------------------------------
+# main
+
+if __name__ == '__main__':
+    # the headings to print, and their corresponding ldap attributes.
+    HEADINGS = ["uid", "surname", "given", "type", "department", "email"]
+    MAPPING  = ["uid", "sn", "givenName", "employeeType", "uwinDepartment", "mail"]
+
+    out      = csv.writer(sys.stdout)
+    query    = sys.argv[1]
+    students = 'students' in sys.argv
+    filt     = build_filter(query, include_students=students)
+    results  = search(filt)
+
+    # print them
+    out.writerow(HEADINGS)
+    for dct in results:
+        row = [dct.get(key,'') for key in MAPPING]
+        out.writerow(row)
index 4d9c63b..7af9c35 100644 (file)
@@ -214,8 +214,8 @@ def site_join(request, site_id):
 
 @admin_only
 def site_fuzzy_user_lookup(request):
-    query = request.POST.get('q').lower().strip()
-    include_students = (request.POST.get('includeStudents') == 'true')
+    query = request.REQUEST.get('q').lower().strip()
+    include_students = (request.REQUEST.get('includeStudents') == 'true')
     results = callhook('fuzzy_person_lookup', query, include_students) or []
     limit = 10
     resp = {'results': results[:limit],