From 3530c2976187ce2717b71bb40c003e96b1783038 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Tue, 1 Sep 2015 16:41:37 -0400 Subject: [PATCH] JBAS-886 Student CSV importer script See --help for options. Signed-off-by: Bill Erickson --- .../import_students/generate-patrons-from-csv.pl | 719 +++++++++++++++++++++ 1 file changed, 719 insertions(+) create mode 100755 KCLS/utility-scripts/import_students/generate-patrons-from-csv.pl diff --git a/KCLS/utility-scripts/import_students/generate-patrons-from-csv.pl b/KCLS/utility-scripts/import_students/generate-patrons-from-csv.pl new file mode 100755 index 0000000000..9b1734fc06 --- /dev/null +++ b/KCLS/utility-scripts/import_students/generate-patrons-from-csv.pl @@ -0,0 +1,719 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use DBI; +use Text::CSV; +use DateTime; +use Data::Dumper; +use Getopt::Long; +use DateTime; +use Sys::Syslog qw(syslog openlog); + +# globals -- +my $syslog_facility = 'LOCAL6'; # matches Evergreen gateway +my $syslog_ops = 'pid'; +my $profile = 901; # "Student Ecard" +my $net_access = 101; # No Access +my $ident_type = 101; # "Sch-district file" ident type +my $syslog_ident = 'ECARD'; +my $alert_msg = 'Student Ecard: No physical checkouts. No computer/printing. No laptops.'; +my $alert_type = 20; # "Alerting note, no Blocks" standing penalty +my $root_org = 1; # KCLS org unit for penalty application +my $db_handle; +my %new_barcodes; +my @failures; # one row per student account that failed for any reason + +# summary output file is generated from these numbers +my %summary_stats = ( + total => 0, # total # students in CSV file + trans => 0, # count of students whose data was successfully translated + create => 0, # number successfully created + update => 0, # number successfully updated + delete => 0, # number successfully deleted + trans_err => 0, + create_err => 0, + update_err => 0, + delete_err => 0 +); + +# pre-prepared sql query statement handles; +my $create_user_sth; +my $create_addr_sth; +my $create_card_sth; +my $create_alrt_sth; +my $create_link_sth; + + +# run-time options -- +my $home_ou; # home org unit id +my $district_code; # 3-char school district code +my $default_pass; +my $commit_mode = 'rollback'; # single xact, then rollback (for testing) +my $assume_new = 0; +my $purge_all = 0; +my $out_dir = '.'; +my $log_stdout = 0; # copy logs to stdout +my $help = 0; # show help message if true +my $db_user = 'evergreen'; +my $db_host = 'localhost'; +my $db_name = 'evergreen'; +my $db_port = 5432; +my $db_pass; +my $csv_file; # cmd line option after flags + +GetOptions( + 'home-ou=s' => \$home_ou, + 'district-code=s' => \$district_code, + 'commit-mode=s' => \$commit_mode, + 'assume-new' => \$assume_new, + 'purge-all' => \$purge_all, + 'default-pass=s' => \$default_pass, + 'log-stdout' => \$log_stdout, + 'out-dir=s' => \$out_dir, + 'db-user=s' => \$db_user, + 'db-host=s' => \$db_host, + 'db-name=s' => \$db_name, + 'db-port=i' => \$db_port, + 'db-pass=s' => \$db_pass, + 'help' => \$help +); + +sub help { + print < + +Options: + + --home-ou + Org unit ID of the home library used for all patrons loaded + in the current file. + + --district-code + 3-character school district code. + + --default-pass + If set, use this password instead of the last 4 of the student ID. + + --commit-mode [rollback] + Sets the commit mode, which is used to determine when the SQL + script will issue database BEGIN/COMMIT pairs. + + Values: + + "batch" -- Insert all patrons in a single database transaction. + This is the default and it's useful for testing, + since a bad batch can be rolled back en masse. + + "rollback" -- Same as batch, but instead of issuing a "commit" after + all updates are executed, issue a "rollback". + + "each" -- Insert each patron within its own transaction. Used + for production deployments to ensure that all patrons + that insert without failure are added to the DB. + + --assume-new + Assume all users in the CSV file are new users. No checks will + be made to see if the user already exists in the DB. + + --purge-all + Ignore any commands in the CSV file and purge (permanently delete) + every user represented in the file. This is useful for testing. + The user will be prompted to continue. USE WITH CAUTION. + + --out-dir + Output directory for status files. Defaults to current working + directory. + + --log-stdout + Copy log messages to STDOUT (in addition to syslog). + Warnings and errors are always copied to STDERR. + + --db-user [evergreen] + --db-host [localhost] + --db-name [evergreen] + --db-port [5432] + --db-pass + Database connection options. + + --help + Show this message +HELP + exit; +} + +# options for level match syslog options: DEBUG,INFO,WARNING,ERR +sub announce { + my ($level, $msg, $die) = @_; + syslog("LOG_$level", $msg); + + my $date_str = DateTime->now->strftime('%F %T'); + my $msg_str = "$date_str [$$] $level $msg\n"; + + if ($die) { + die $msg_str; + + } else { + if ($level eq 'ERR' or $level eq 'WARNING') { + # always copy problem messages to stdout + warn $msg_str; # avoid dupe messages + } elsif ($log_stdout) { + print $msg_str; + } + } +} + +# ensure command line options are sane +sub verify_options { + help() if $help; # exits + + die "--home-ou required\n" unless $home_ou; + die "--district-code required\n" unless $district_code; + die "CSV file required\n" unless $csv_file = $ARGV[0]; + + die "Valid --commit-mode values are 'batch', 'rollback', and 'each'\n" + unless $commit_mode =~ /^(batch|rollback|each)$/; + + if ($purge_all) { + print "\nPurge every user in the CSV file? This is irreversible!\n"; + print "Are you sure? [yes|no]\n"; + my $resp = ; + chomp($resp); + die "Exiting\n" unless $resp eq 'yes'; + } +} + +sub connect_db { + $db_handle = DBI->connect( + "dbi:Pg:db=$db_name;host=$db_host;port=$db_port", + $db_user, $db_pass, { + RaiseError => 1, + PrintError => 0, + AutoCommit => 0, + pg_expand_array => 0, + pg_enable_utf8 => 1 + } + ) or announce('ERR', + "Connection to database failed: $DBI::err : $DBI::errstr", 1); +} + +# handler is sub that takes a single argument, the student row hash +sub iterate_csv_rows { + my $row_handler = shift; + my $csv = Text::CSV->new; + my $fh; + + open($fh, $csv_file) or + announce('ERR', "Unable to open CSV file: '$csv_file' : $!", 1); + + binmode($fh, ":utf8"); + + my $header = readline($fh); + if (!$csv->parse($header)) { + announce('ERR', "Unable to parse CSV header: $header", 1); + } + + $csv->column_names($csv->fields); + + while (my $phash = $csv->getline_hr($fh)) { + $phash->{barcode} = $district_code . $phash->{student_id}; + $row_handler->($phash); + } + + if (!$csv->eof) { + announce('ERR', + "CSV parsing exited prematurely: " . $csv->error_diag()); + } + + $fh->close; +} + +# Determine which patrons in the file are new by testing whether their +# barcode exists in EG. This batch version is used when we have no +# per-row actions (create, update, delete) in the CSV file to clarify +# which patrons are new. +sub find_new_patrons { + return if $assume_new or $purge_all; # no lookup required. + + my @all_barcodes; + + my $row_handler = sub { + my $phash = shift; + push(@all_barcodes, $phash->{barcode}); + }; + + iterate_csv_rows($row_handler); + + announce('INFO', "File contains ".scalar(@all_barcodes)." total students"); + + my $all_barcodes = join(',', @all_barcodes); + + # generate the SQL to determine which barcodes don't exist + my $SQL = <selectall_arrayref($SQL); + + announce('INFO', "New barcodes query returned ". + scalar(@$new_barcodes) ." new barcodes"); + + # hash-ify for faster lookup when processing each patron + %new_barcodes = map {$_->[0] => 1} @$new_barcodes; +} + +# expire date is set to July 1 after the patron's 18th birthday. +sub set_expire_date { + my $phash = shift; + + my ($year, $mon, $day) = ($phash->{dob} =~ /(\d{4})-(\d{2})-(\d{2})/); + my $now_year = DateTime->now->year; + my $expire_year = $now_year + (18 - ($now_year - $year)); + + # if dob occurs after july 1, expire date will occur the following year. + $expire_year++ if ($mon > 7) or ($mon == 7 and $day > 1); + + my $expire_date = DateTime->new( + year => $expire_year, month => 7, day => 1, time_zone => 'local'); + + $phash->{expire_date} = $expire_date; +} + +# Fills in gaps and massages data in the CSV hash. +# Returns 1 on successful translation, undef on error. +sub translate_patron_data { + my $phash = shift; + my $barcode = $phash->{barcode}; + + # keep a copy of the source data for the patron for + # reporting if translation fails. + my $orig_phash = { map {$_ => $phash->{$_}} keys %$phash }; + + my @errors = translate_patron_fields($phash); + return 1 unless scalar(@errors); + + $orig_phash->{err_msg} = join(';', @errors); + $orig_phash->{err_type} = 'trans'; + push(@failures, $orig_phash); + $summary_stats{trans_err}++; + return 0; +} + +# Returns undef on success, list of error message on failure +sub translate_patron_fields { + my $phash = shift; + my $barcode = $phash->{barcode}; + my @errors; + + # no field should have leading/trailing spaces + foreach (keys %$phash) { + $phash->{$_} =~ s/(^\s*|\s*$)//g if $phash->{$_}; + } + + # check required fields + for my $field (qw/student_id first_given_name family_name dob/) { + push(@errors, "No data present for required field $field") + unless $phash->{$field}; + } + + if ($phash->{dob} =~ m|\d{1,2}/\d{1,2}/\d{4}|) { + # dob is encoded in American-style month/day/year + # Translate it to ISO8601 + my ($mon, $day, $year) = + $phash->{dob} =~ m|(\d{1,2})/(\d{1,2})/(\d{4})|; + $phash->{dob} = sprintf("%s-%02d-%02d", $year, $mon, $day); + } + + push(@errors, "Invalid dob") + unless $phash->{dob} =~ m/^\d{4}-\d{2}-\d{2}$/; + + if (my $dp = $phash->{day_phone}) { + + if ($dp =~ /^\(?\d{3}\)?-?\s*-?\s*$/) { + # if the day_phone only contains an area code, treat it as unset. + $dp = undef; + + } elsif ($dp =~ /^\(?(\d{3})\)?[- \.](\d{3})[- \.](\d{4})$/) { + + # Supported phone formats: + # XXX-YYY-ZZZZ / (XXX) YYY-ZZZZ / XXX.YYY.ZZZZ + + # normalize the phone to XXX-YYY-ZZZZ + $dp =~ s/[\(\)]//g; # strip parens + $dp =~ s/[ \.]/-/g; + + $phash->{day_phone} = $dp; + + } else { + # Phone exists, is more than an area code, but is otherwise + # malformed. Kick it back for repairs. + push(@errors, "Invalid day_phone"); + } + } + + # we cannot continue if errors have occured at this stage. + return @errors if @errors; + + # password uses the last 4 characters of the student ID, + # unless a default password is set. + $phash->{passwd} = $default_pass ? $default_pass : + substr($phash->{student_id}, -4, 4); + + set_expire_date($phash); + + # most text fields should be upper-case + for (qw/first_given_name second_given_name + family_name street1 street2 city state county country/) { + $phash->{$_} = uc($phash->{$_}) if $phash->{$_}; + } + + # Replace 'AV' with 'AVE', but only when "AV" is surrounded by space + # period, or end of line, so as not to clobber names that contain AV. + if (my $s1 = $phash->{street1}) { + $s1 =~ s/\s+AV(\s|\.|$)+/ AVE /g; + $s1 =~ s/(^\s*|\s*$)//g; + $phash->{street1} = $s1; + } + + # Our policy is to include the apartment / unit number in the + # stree1 value. If street2 starts with APT or UNIT, append it + # onto the end of street1 (and clear street2). + # We also replace any occurrence of APT or UNIT with a '#'. + if (my $s2 = $phash->{street2}) { + if ($s2 =~ /^(APT|UNIT|#)/) { + $s2 =~ s/^(APT\.?|UNIT\.?)//g; # remove APT / UNIT + $s2 =~ s/^\s*//g; # trim leading space + if ($s2 =~ /^#/) { + # if the addr starts with a #, ensure it's followed by a space + $s2 =~ s/^#/# /g if $s2 =~ /^#[^\s]/; + } else { + # if no '#' is present to replace APT/UNIT, add it. + $s2 = "# $s2" unless $s2 =~ /^#/; + } + + # remove random "," "." "-" and extra spaces that + # occur after the initial "#". + $s2 =~ s/^#[\s,\.-]*(.*)$/# $1/g; + + if ($phash->{street1}) { + $phash->{street1} .= " $s2"; + } else { + $phash->{street1} = $s2; + } + $phash->{street2} = undef; + } + } + + # set required-but-missing address fields to NONE + for my $field (qw/street1 city post_code/) { + next if $phash->{$field}; + $phash->{$field} = 'NONE'; + } + + # apply some default values + $phash->{state} ||= 'WA'; + $phash->{country} ||= 'USA'; + $phash->{within_city_limits} ||= 'f'; + + return @errors; +} + +sub process_each_patron { + + my $row_handler = sub { + my $phash = shift; + $summary_stats{total}++; + + if ($purge_all) { + # Purging overrides all other actions and requires no + # data translation. + purge_patron($phash); + return; + } + + return unless translate_patron_data($phash); + $summary_stats{trans}++; + + if ($assume_new or $new_barcodes{$phash->{barcode}}) { + create_patron($phash); + } else { + update_patron($phash); + } + }; + + iterate_csv_rows($row_handler); + + if ($commit_mode eq 'rollback') { + announce('INFO', "Rolling back batch transaction"); + eval { $db_handle->rollback }; + + } elsif ($commit_mode eq 'batch') { + announce('INFO', "Committing batch transaction"); + eval { $db_handle->commit }; + } +} + +sub purge_patron { + my $phash = shift; + my $bc = $phash->{barcode}; + + my $user_id = $db_handle->selectrow_array( + "SELECT usr FROM actor.card WHERE barcode = '$bc'"); + + if (!$user_id) { + announce('DEBUG', "No user to purge with barcode $bc"); + return 0; + } + + announce('DEBUG', "Purging user $bc with ID $user_id"); + + eval { + $db_handle->selectrow_array( + "SELECT actor.usr_delete($user_id, NULL)"); + }; + + if (my $err = $@) { + + if ($commit_mode eq 'each') { + announce('WARNING', + "Unable to purge $bc (ID $user_id) : $err"); + + } else { # batch + announce('ERR', + "Unable to purge $bc (ID $user_id) : $err", 1); + } + + } else { + + announce('DEBUG', "User $bc (ID $user_id) successfully purged"); + eval { $db_handle->commit } if $commit_mode eq 'each'; + } +} + +# prepare our SQL once at startup. +sub prepare_patron_sql { + + $create_user_sth = $db_handle->prepare(<prepare(<prepare(<prepare(<prepare(<{barcode}; + + my @user_bind = ( + 't', $profile, $ident_type, $barcode, + $home_ou, $net_access, + $phash->{passwd}, + $phash->{student_id}, + $phash->{first_given_name}, + $phash->{second_given_name}, + $phash->{family_name}, + $phash->{expire_date}, + $phash->{dob}, + $phash->{email}, + ); + + my @addr_bind = ( + $phash->{street1}, + $phash->{street2}, + $phash->{city}, + $phash->{post_code}, + $phash->{state}, + $phash->{county}, + $phash->{country} || 'USA', + $phash->{within_city_limits} + ); + + my @card_bind = ($barcode); + + my @alrt_bind = ( + $root_org, + $alert_type, + $alert_msg + ); + + return unless handle_insert($phash, $create_user_sth, \@user_bind); + return unless handle_insert($phash, $create_addr_sth, \@addr_bind); + return unless handle_insert($phash, $create_card_sth, \@card_bind); + return unless handle_insert($phash, $create_alrt_sth, \@alrt_bind); + return unless handle_insert($phash, $create_link_sth, [], 1); +} + +# returns 1 on success, undef on failure +sub handle_insert { + my ($phash, $sth, $bind_vars, $final) = @_; + my $barcode = $phash->{barcode}; + + my $rows; + eval { $rows = $sth->execute(@$bind_vars) }; + + if (my $err = $@) { + eval { $db_handle->rollback }; + + $summary_stats{create_err}++; + $phash->{err_msg} = $err; + $phash->{err_type} = 'create'; + push(@failures, $phash); + + if ($commit_mode eq 'each') { + announce('WARNING', "Failed to INSERT new patron $barcode : $err"); + + } else { # batch / rollback + # Transaction failed in batch mode. + # No other actions will succeed. Exit the script. + announce('ERR', "Batch transaction aborted on $barcode : $err", 1); + } + + return undef; + } + + if ($final and $rows > 0) { + # commit the current patron once all required rows are inserted. + announce('DEBUG', "Inserted new patron $barcode"); + eval { $db_handle->commit } if $commit_mode eq 'each'; + $summary_stats{create}++; + } + + return 1; +} + +sub update_patron { + my $phash = shift; + announce('INFO', "Modifying patron with barcode " . $phash->{barcode}); +} + +sub cleanup_db { + $create_user_sth->finish; + $create_addr_sth->finish; + $create_card_sth->finish; + $create_alrt_sth->finish; + $create_link_sth->finish; + $db_handle->disconnect; +} + +sub generate_result_files { + (my $file_pfx = $csv_file) =~ s|.*?([^/]+).csv$|$1|ig; + my $sum_file_name = "$out_dir/$file_pfx.sum.csv"; + my $err_file_name = "$out_dir/$file_pfx.err.csv"; + + # ------ SUMMARY FILE ------- + + open(SUM, ">:encoding(utf8)", $sum_file_name) or announce('ERR', + "Cannot create summary file $sum_file_name : $!", 1); + + # Summary CSV file is trivial. Safe to generate manually. + my $header = join(',', sort(keys(%summary_stats))); + my $body = join(',', (map {$summary_stats{$_}} sort(keys(%summary_stats)))); + + print SUM "$header\n$body\n" or announce('ERR', + "Error writing summary file $sum_file_name : $!", 1); + + close(SUM); + + # ------ ERROR FILE ------- + + my $err_file; + open($err_file, ">:encoding(utf8)", $err_file_name) + or announce('ERR', "Cannot create error file $err_file_name : $!", 1); + + my $csv = Text::CSV->new; + + my $first = 1; + for my $phash (@failures) { + my @keys = sort(keys(%$phash)); + + # first time through generate the CSV header row + if ($first) { + + $csv->print($err_file, \@keys) or announce('ERR', + "Error writing failures file $err_file_name : ". + $csv->error_diag); + + $first = 0; + } + + my @values = map { $phash->{$_} } @keys; + + $csv->print($err_file, \@values) or announce('ERR', + "Error writing failures file $err_file_name : ". + $csv->error_diag); + } + + close($err_file); +} + +# ------ execution starts here ----------- + +openlog($syslog_ident, $syslog_ops, $syslog_facility); +verify_options(); +connect_db(); +prepare_patron_sql(); +find_new_patrons(); +process_each_patron(); +cleanup_db(); +generate_result_files(); + -- 2.11.0