--- /dev/null
+#!/usr/bin/env python
+"""
+Flag potential corrupted characters in MARC records
+
+This is a very simple script that simply prints out the subfields
+containing values matching \x in the repr() representation of the subfield
+using the tab-delimited format of record number, tag, code, Unicode value,
+and repr() value of the matching subfield.
+
+Shortcomings: if the subfield contains Unicode sequences (\u####) then the
+script assumes that they're valid and does not flag them.
+"""
+
+import os, os.path, sys, getopt, pymarc, pymarc.marc8, re, urllib2
+
+marcfile = 'gibson_cppc-2011-05-24_362.mrc.utf8'
+
+record_cnt = 0
+
+reader = pymarc.MARCReader(
+ open(marcfile, mode='rb'), to_unicode=True
+)
+for record in reader:
+ record_cnt += 1
+ # print "Record %d" % record_cnt
+
+ field_cnt = 0
+ for field in record.get_fields():
+ if field.is_control_field():
+ continue
+
+ # print "Field %d, tag %s" % (field_cnt, field.tag)
+ for subfield in field:
+ # print repr(subfield[1])
+ if r'\x' not in repr(subfield[1]):
+ continue
+
+ print "%d\t%s\t%s\t%s\t%s" % (
+ record_cnt, field.tag, subfield[0], subfield[1].encode('utf8'), repr(subfield[1])
+ )
+
#!/usr/bin/env python
-import ldap, sys
-
-con = ldap.initialize('ldap://142.51.1.188')
-
-#try:
-# con.start_tls_s()
-#except ldap.LDAPError, e:
-# print e.message['info']
-# if type(e.message) == dict and e.message.has_key('desc'):
-# print e.message['desc']
-# else:
-# print e
-# sys.exit()
-#
-
-dn = "uid=Libr_LDAP;ou=EMPL;o=LUL"
-pw = ""
-
-auth = con.simple_bind_s(dn, pw)
-print auth
+"""
+ldap_sync: create and update Evergreen accounts based on an LDAP directory
+
+LDAP authentication information is stored in a separate Python file and
+imported to avoid storing credentials in the VCS.
+
+Rough plan:
+
+1. Create new accounts
+ a. Pull new LDAP records since a given time from the LDAP directory
+ using the filter (createTimestamp>=$time) and insert into a
+ staging table with the following columns; included is a sample
+ mapping to the LU LDAP attributes:
+ * first_given_name (givenName)
+ * family_name (sn)
+ * ident_value (lulColleagueId)
+ * usrname (cn)
+ * language (preferredLanguage)
+ * profile (lulPrimaryAffiliation)
+ * datatel_barcode (datatel_to_barcode(lulColleagueId)
+ b. For each LDAP record, create a new library system account if it
+ does not already exist (check for matches based on usrname, email
+ address, datatel_barcode). Map LDAP attributes to account profile,
+ first and last names, email address.
+ * Set passwd to a randomly generated value; first time users can
+ reset via email
+ * Set ident_type = 2, ident_value = ident_value
+ * Set home_ou appropriately
+ * Set expire_date to next September for students, 20 years from
+ now (?) for faculty / staff
+ * Set preferred language stat cat
+ c. Create a new barcode for the user via a PostgreSQL routine; draw
+ the base number from a database series. We no longer want to use
+ barcodes based on the Datatel number. This routine should update
+ the actor.usr.card column with the appropriate card ID.
+2. Update existing accounts
+ a. If we found a match in 1(b), then update attributes accordingly:
+ * Set preferred language stat cat
+ * Update ident_type / ident_value to Datatel ID
+ * Set email address based on cn
+"""
+
+import sys
+import ldap, luauth
+
+def datatel_to_barcode(datatel):
+ """
+ Converts a Datatel Colleague ID into a barcode
+
+ Used only for matching legacy barcodes for the purposes of updates.
+ New users will get a barcode generated for them from a database series.
+
+ >>> datatel_to_barcode('0104923')
+ '00007001049233'
+ """
+
+ barcode = '000070%s' % (datatel)
+ barcode = '%s%d' % (barcode, mod10_checksum(barcode))
+
+ return barcode
+
+def mod10_checksum(barcode):
+ """
+ Calculates the mod10 checksum for a given string of digits
+
+ This checksum algorithm is used for Code 3 of 9 barcodes.
+ """
+
+ total, position = 0, 0
+ for digit in barcode:
+ digit = int(digit)
+ position += 1
+ if (position % 2):
+ digit *= 2
+ if digit < 10:
+ total += digit
+ else:
+ total += digit - 9
+ else:
+ total += digit
+
+ rem = total % 10
+ if rem:
+ return 10 - rem
+ return rem
+
+def database_mod10():
+ """
+ Define a PostgreSQL function for generating mod10 check digits
+ """
+
+ print """CREATE OR REPLACE FUNCTION evergreen.mod10(TEXT) RETURNS TEXT AS $$
+ use strict;
+ use warnings;
+
+ my $barcode = shift;
+ my $total = 0;
+ my $position = 0;
+ foreach my $digit (split('', $barcode)) {
+ $position++;
+ if ($position % 2) {
+ # Double it
+ $digit *= 2;
+ # If less than 10, add to the total
+ if ($digit < 10) {
+ $total += $digit;
+ } else {
+ $total += $digit - 9;
+ }
+ } else {
+ $total += $digit;
+ }
+ }
+ my $rem = $total % 10;
+ if ($rem) {
+ return 10 - $rem;
+ }
+ return $rem;
+$$ LANGUAGE PLPERLU STRICT IMMUTABLE;
+"""
+
+def create_staging_table():
+ """
+ Create a staging table for creating or updating user accounts
+ """
+
+ print """
+DROP TABLE IF EXISTS scratchpad.usr_staging;
+CREATE TABLE scratchpad.usr_staging (usrname TEXT, family_name TEXT, first_given_name TEXT, ident_value TEXT, lang TEXT);
+"""
+
+def search_for_students(con):
+ base_dn = 'o=lul'
+ search_scope = ldap.SCOPE_SUBTREE
+ attributes = ['lulPrimaryAffiliation', 'cn', 'mail', 'givenName', 'sn', 'lulColleagueId', 'preferredLanguage']
+ filter = '(&(objectclass=lulEduPerson))'
+ filter = '(&(objectclass=lulEduPerson)(lulPrimaryAffiliation=*))'
+
+ try:
+ result_id = con.search(base_dn, search_scope, filter, attributes)
+ result_set = []
+ while 1:
+ result_type, result_data = con.result(result_id, 0)
+ if result_data == []:
+ break
+ else:
+ print result_data[0][0]
+ for key in result_data[0][1]:
+ print key, result_data[0][1][key]
+ except ldap.LDAPError, e:
+ print e
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
+
+ con = ldap.initialize(luauth.hostname)
+ con.set_option(ldap.OPT_REFERRALS, 0)
+
+ try:
+ con.simple_bind_s(luauth.dn, luauth.pw)
+ search_for_students(con)
+ except ldap.LDAPError, e:
+ print "Could not connect: " + e.message['info']
+ if type(e.message) == dict and e.message.has_key('desc'):
+ print e.message['desc']
+ else:
+ print e
+ sys.exit()
+ finally:
+ con.unbind()
+
+# vim: et:ts=4:sw=4:tw=78: