From 690776dcc8412ffde22110b0dd97d957c73df739 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Mon, 9 Jan 2017 10:43:17 -0500 Subject: [PATCH] Student importer script sanity checks 1. Skip rows in the CSV file that contain no data for student_id. 2. Avoid trying to create students whose barcode matches an existing username in addition to an existing barcode. 3. Avoid inspecting and translating data for non-new accounts when only importing new accounts. This reduces the work load of the script and allows it to complete faster. 4. Improve handling/logging for empty files 5. Exit with status 0 for calls to announce with the 'die' flag set to true. In other words, treat them as expected errors that fail gracefully without preventing the calling script (sftp-client-agent) from exiting on an expected error. 6. Strip non-alpha-numeric characters from student_id values Signed-off-by: Bill Erickson --- .../import_students/generate-patrons-from-csv.pl | 39 ++++++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/KCLS/utility-scripts/import_students/generate-patrons-from-csv.pl b/KCLS/utility-scripts/import_students/generate-patrons-from-csv.pl index b6f418c1ac..45c7203156 100755 --- a/KCLS/utility-scripts/import_students/generate-patrons-from-csv.pl +++ b/KCLS/utility-scripts/import_students/generate-patrons-from-csv.pl @@ -191,7 +191,8 @@ sub announce { my $msg_str = "$date_str [$$] $level $msg\n"; if ($die) { - die $msg_str; + warn $msg_str; + exit 0; } else { if ($level eq 'ERR' or $level eq 'WARNING') { @@ -248,19 +249,24 @@ sub iterate_csv_rows { binmode($fh, ":utf8"); - my $header = readline($fh); - if (!$csv->parse($header)) { - announce('ERR', "Unable to parse CSV header: $header", 1); - } + my $header = readline($fh) || ''; + + announce('ERR', "Unable to parse CSV header: '$header'", 1) + unless $header && $csv->parse($header); $csv->column_names($csv->fields); + my $skipped = 0; while (my $phash = $csv->getline_hr($fh)) { + if (!$phash->{student_id}) { $skipped++; next; } + + # Remove any non-alphanumeric characters. + $phash->{student_id} =~ s/[^\w]//oag; + # If the ID value is less than 4 characters, pad the value # with zeros to be 4 characters long. This will effect - # the barcode and password. - # Only do this for teacher accounts. + # the barcode and password. Only do this for teacher accounts. $phash->{student_id} = sprintf('%0.4d', $phash->{student_id}) if $is_teacher && length($phash->{student_id}) < 4; @@ -280,6 +286,8 @@ sub iterate_csv_rows { } $fh->close; + + announce('WARNING', "$skipped entries had no student_id") if $skipped; } # Determine which patrons in the file are new by testing whether their @@ -302,15 +310,20 @@ sub find_new_patrons { my $all_barcodes = join(',', @all_barcodes); - # generate the SQL to determine which barcodes don't exist + # Generate the SQL to determine which barcodes don't yet exist. + # Confirm the incoming barcode does not match an existing + # barcode or username, since either would cause an insert failure. my $SQL = <selectall_arrayref($SQL); @@ -491,6 +504,10 @@ sub process_each_patron { return; } + # NOTE: exit early for non-new barcodes since patron updates + # are not supported. This makes the script run faster. + return unless $new_barcodes{$phash->{barcode}}; + return unless translate_patron_data($phash); $summary_stats{trans}++; -- 2.11.0