Towards a periodic OpenSRF-speaking LDAP patron sync script
authorDan Scott <dan@coffeecode.net>
Tue, 6 Sep 2011 20:16:51 +0000 (16:16 -0400)
committerDan Scott <dscott@laurentian.ca>
Tue, 7 May 2013 18:38:25 +0000 (14:38 -0400)
Still need to factor out the mapping of LDAP schema to Evergreen
attributes further but patience, patience.

Signed-off-by: Dan Scott <dscott@laurentian.ca>
tools/patron-load/ldap_osrf_sync [new file with mode: 0644]

diff --git a/tools/patron-load/ldap_osrf_sync b/tools/patron-load/ldap_osrf_sync
new file mode 100644 (file)
index 0000000..f2855b3
--- /dev/null
@@ -0,0 +1,311 @@
+#!/usr/bin/env python
+
+"""
+Synchronize Evergreen user accounts with an LDAP directory via OpenSRF 
+
+LDAP and OpenSRF authentication information is stored in a separate Python
+file (credentials.py) and imported to avoid storing credentials in the VCS.
+
+1. Pull a list of new LDAP records since the last sync from the LDAP
+   directory using the filter (createTimestamp>=time).
+
+2. For each new LDAP record, check to see if the record exists in Evergreen
+   (matching on ident_value)
+   
+   If not, create a new account with barcode.
+
+3. Dump the output of new ident_value + barcode in CSV format somewhere.
+"""
+
+import sys
+import ldap
+
+import oils.event
+import oils.utils.idl
+import oils.utils.utils
+import osrf.gateway
+import osrf.json
+import tempfile
+import urllib2
+
+import credentials
+
+def load_idl():
+    """
+    Loads the fieldmapper IDL, registering class hints for the defined objects
+
+    We use a temporary file to store the IDL each time load_idl()
+    is invoked to ensure that the IDL is in sync with the target
+    server. One could a HEAD request to do some smarter caching,
+    perhaps.
+    """
+    
+    parser = oils.utils.idl.IDLParser()
+    idlfile = tempfile.TemporaryFile()
+
+    # Get the fm_IDL.xml file from the server
+    try:
+        idl = urllib2.urlopen('%s://%s/%s' % 
+            (credentials.OSRF_HTTP, credentials.OSRF_HOST, credentials.IDL_URL)
+        )
+        idlfile.write(idl.read())
+        # rewind to the beginning of the file
+        idlfile.seek(0)
+
+    except urllib2.URLError, exc:
+        print("Could not open URL to read IDL: %s", exc.code)
+
+    except IOError, exc:
+        print("Could not write IDL to file: %s", exc.code)
+
+    # parse the IDL
+    parser.set_IDL(idlfile)
+    parser.parse_IDL()
+
+def login(username, password, workstation=None):
+    """
+    Login to the server and get back an authtoken
+    """
+
+    __authtoken = None
+
+    print("attempting login with user " + username)
+
+    seed = request(
+        'open-ils.auth', 
+        'open-ils.auth.authenticate.init', username).send()
+
+    # generate the hashed password
+    password = oils.utils.utils.md5sum(seed + oils.utils.utils.md5sum(password))
+
+    result = request(
+        'open-ils.auth',
+        'open-ils.auth.authenticate.complete',
+        {   'workstation' : workstation,
+            'username' : username,
+            'password' : password,
+            'type' : 'staff' 
+        }).send()
+
+    evt = oils.event.Event.parse_event(result)
+    if evt and not evt.success:
+        raise AuthException(evt.text_code)
+
+    __authtoken = result['payload']['authtoken']
+    return __authtoken
+
+def request(service, method, *args):
+    """
+    Make a JSON request to the OpenSRF gateway
+
+    This is as simple as it gets. Atomic requests will require a bit
+    more effort.
+    """
+
+    req = osrf.gateway.JSONGatewayRequest(service, method, *args)
+
+    # The gateway URL ensures we're using JSON v1, not v0
+    req.setPath(credentials.GATEWAY_URL)
+    return req
+
+class AuthException(Exception):
+    """
+    Exceptions for authentication events
+    """
+
+    def __init__(self, msg=''):
+        """
+        Initialize the authentication exception
+        """
+        Exception.__init__(self)
+        self.msg = msg
+
+    def __str__(self):
+        """
+        Stringify the authentication exception
+        """
+        return 'AuthException: %s' % self.msg
+
+def datatel_to_barcode(datatel):
+    """
+    Converts a Datatel Colleague ID into a barcode
+
+    Used only for matching legacy barcodes for the purposes of updates.
+    New users will get a barcode generated for them from a database series.
+
+    >>> datatel_to_barcode('0104923')
+    '00007001049233'
+    """
+
+    barcode = '000070%s' % (datatel)
+    barcode = '%s%d' % (barcode, mod10_checksum(barcode))
+
+    return barcode
+
+def barcode_to_datatel(barcode):
+    """
+    Converts a barcode into a Datatel Colleague ID
+
+    Used to generate the ident_value for legacy users.
+
+    >>> barcode_to_datatel('00007001049233')
+    '0104923'
+    """
+
+    if len(barcode) != 14:
+        return False
+
+    return barcode[6:13]
+
+def mod10_checksum(barcode):
+    """
+    Calculates the mod10 checksum for a given string of digits
+
+    This checksum algorithm is used for Code 3 of 9 barcodes.
+    """
+
+    total, position = 0, 0
+    for digit in barcode:
+        digit = int(digit)
+        position += 1
+        if (position % 2):
+            digit *= 2
+            if digit < 10:
+                total += digit
+            else:
+                total += digit - 9
+        else:
+            total += digit
+
+    rem = total % 10
+    if rem:
+        return 10 - rem
+    return rem
+
+def find_new_ldap_users(con, attributes, create_date):
+    """
+    Retrieve personnel accounts from LDAP directory and process'em
+    """
+    base_dn = 'o=lul'
+    search_scope = ldap.SCOPE_SUBTREE
+    ldap_filter = '(&(objectclass=lulEduPerson))'
+    ldap_filter = '(&(objectclass=lulEduPerson)(lulPrimaryAffiliation=*)(createTimestamp>=%s000000Z))' % create_date
+    ldap_filter = '(&(lulStudentLevel=*))'
+
+    try:
+        result_id = con.search(base_dn, search_scope, ldap_filter, attributes)
+        while 1:
+            result_type, result_data = con.result(result_id, 0)
+            if result_data == []:
+                break
+            else:
+                # dump_data(result_data)
+                create_evergreen_user(result_data[0][1])
+    except ldap.LDAPError, e:
+        print >> sys.stderr, e
+
+def create_evergreen_user(result_data):
+    """
+    Generate statements to push data into the staging table
+    """
+
+    if 'mail' not in result_data:
+        print >> sys.stderr, 'mail not found for %s' % result_data['cn']
+        return
+
+    newau = oils.utils.idl.IDLParser.get_class('au')()
+    newau.isnew(True)
+
+    # Strip leading/ending whitespace
+    newau.usrname(result_data['mail'][0].strip().lower())
+    newau.email(result_data['mail'][0].strip().lower())
+    newau.family_name(result_data['sn'][0].strip().lower())
+    newau.ident_value(result_data['lulColleagueId'][0].strip().lower())
+
+    if 'givenName' in result_data:
+        newau.given_name(result_data['givenName'][0].strip())
+    else:
+        newau.given_name('LDAP_NULL')
+        print >> sys.stderr, 'No givenName for %s' % (newau.usrname())
+
+    if len(newau.ident_value()) != 7:
+        print >> sys.stderr, 'Datatel number not 7 chars for %s (%s)' % (
+            newau.usrname(), newau.datatel()
+        )
+        if len(newau.datatel()) == 6:
+            newau.datatel('0%s' % newau.datatel())
+        elif len(newau.datatel()) == 5:
+            newau.datatel('00%s' % newau.datatel())
+
+    if 'preferredLanguage' in result_data:
+        lang = result_data['preferredLanguage'][0].strip()
+    else:
+        lang = r'\N'
+
+    if 'lulStudentLevel' in result_data:
+        affiliation = result_data['lulStudentLevel'][0].strip().lower()
+    elif 'lulPrimaryAffiliation' in result_data:
+        affiliation = result_data['lulPrimaryAffiliation'][0].strip().lower()
+    else:
+        affiliation = r'\N'
+
+    if affiliation == 'ug' or affiliation == 'student':
+        newau.profile(13)
+    elif affiliation == 'gr':
+        newau.profile(12)
+    elif affiliation == 'al':
+        newau.profile(14)
+    elif affiliation == 'faculty':
+        newau.profile(11)
+    elif affiliation == 'staff':
+        newau.profile(15)
+
+def dump_data(result_data):
+    """
+    Simple dump of all data received
+    """
+
+    print()
+    print(result_data[0][0])
+    for key in result_data[0][1]:
+        print(key, result_data[0][1][key])
+
+def generate_ldap_sql(create_date):
+    """
+    Generate the SQL required to create and update Evergreen accounts
+    """
+
+    con = ldap.initialize(credentials.LDAP_HOST)
+    con.set_option(ldap.OPT_REFERRALS, 0)
+
+    try:
+        attributes = ['lulStudentLevel', 'lulPrimaryAffiliation', 'cn', 'mail', 'givenName', 'sn', 'lulColleagueId', 'preferredLanguage']
+        con.simple_bind_s(credentials.LDAP_DN, credentials.LDAP_PW)
+        find_new_ldap_users(con, attributes, create_date)
+    except ldap.LDAPError, e:
+        print >> sys.stderr, "Could not connect: " + e.message['info']
+        if type(e.message) == dict and e.message.has_key('desc'):
+            print >> sys.stderr, e.message['desc']
+        else:
+            print >> sys.stderr, e
+        sys.exit()
+    finally:
+        con.unbind()
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
+    exit()
+
+    # Set the host for our requests
+    osrf.gateway.GatewayRequest.setDefaultHost(credentials.OSRF_HOST)
+
+    # Pull all of our object definitions together
+    load_idl()
+
+    # Log in and get an authtoken
+    authtoken = login(credentials.OSRF_USER, credentials.OSRF_PW)
+
+    generate_ldap_sql('20110701')
+
+# vim: et:ts=4:sw=4:tw=78: