JBAS-1437 Backstage processor script
authorBill Erickson <berickxx@gmail.com>
Mon, 19 Dec 2016 17:32:37 +0000 (12:32 -0500)
committerBill Erickson <berickxx@gmail.com>
Thu, 21 Mar 2019 19:46:23 +0000 (15:46 -0400)
Script to fetch and process monthly and quarterly Backstage data.

Signed-off-by: Bill Erickson <berickxx@gmail.com>
KCLS/authority-control/backstage/README.adoc
KCLS/authority-control/backstage/backstage-agent.sh [new file with mode: 0755]
KCLS/authority-control/backstage/process-backstage-files.pl
KCLS/utility-scripts/CRONTAB

index 1db1005..247c59d 100644 (file)
@@ -2,17 +2,18 @@
 
 Perform steps as 'opensrf'
 
+TODO: update me when backstage-agent.sh is done.
+
 == Quarterly Export + Import ==
 
 === Setup ===
 
 [source,sh]
 --------------------------------------------------------------------
-export EXPORT_DATE=2016-10-01 # for example
+export EXPORT_DATE=2016-10-01
 export WORKING_DIR=/openils/var/data/authority-control/backstage/quarterly/$EXPORT_DATE
 export PGHOST=foo
 export PGPASSWORD=foo
-export PGUSER=evergreen
 mkdir -p $WORKING_DIR
 --------------------------------------------------------------------
 
@@ -58,7 +59,7 @@ cd /home/opensrf/Evergreen/KCLS/backstage/
     --export-date $EXPORT_DATE \
     --zip-file $WORKING_DIR/<result-file> \
     --working-dir $WORKING_DIR \
-    --bib-collision-file bib-collisions.mrc \
+    --reports-dir $REPORTS_DIR \ # TODO
     > $WORKING_DIR/process.log
 --------------------------------------------------------------------
 
diff --git a/KCLS/authority-control/backstage/backstage-agent.sh b/KCLS/authority-control/backstage/backstage-agent.sh
new file mode 100755 (executable)
index 0000000..b9fa3af
--- /dev/null
@@ -0,0 +1,245 @@
+#!/bin/bash
+# ----------------------------------------------------------------
+# Backstage file processing utility functions.
+# 
+# Environment variables BACKSTAGE_USER and BACKSTAGE_PASSWORD
+# must be set. 
+#
+# PGHOST and PGPASSWORD may also need to be set.
+# ----------------------------------------------------------------
+BACKSTAGE_PUT_URL="ftp://ftp.bslw.com/in"
+# TODO: determine pickup location of quarterly result files
+#BACKSTAGE_GET_URL="ftp://ftp.bslw.com/out"
+# XXX: this is where monthly auth files are delivered.
+BACKSTAGE_GET_URL="http://mars.bslw.com/ftpfiles/NTG"
+WORKING_DIR_BASE="/openils/var/data/authority-control/backstage"
+REPORTS_DIR_BASE="/openils/var/web/standalone/backstage"
+
+# see bs_make_dirs()
+REPORTS_DIR=""
+WORKING_DIR=""
+
+SCRIPT_DIR=$PWD
+CURL="curl --silent --show-error --user $BACKSTAGE_USER:$BACKSTAGE_PASSWORD";
+
+CUR_MONTH=$(date +'%m')
+CUR_YEAR2=$(date +'%y')
+CUR_YEAR4=$(date +'%Y')
+CUR_QTR=$(( ($(date +%-m)-1)/3+1 ))
+PREV_QTR_START_DATE=""
+PREV_QTR_END_DATE=""
+
+function die { echo "$@" 1>&2 ; exit 1; } # thanks, internet.
+
+# Confirm Backstage username and password are provided.
+function bs_check_creds {
+
+    [ -z "${BACKSTAGE_USER+x}" -o -z "${BACKSTAGE_PASSWORD+x}" ] && \
+        die "ENV variables BACKSTAGE_USER and BACKSTAGE_PASSWORD required."
+
+    echo "Connecting to backstage as user $BACKSTAGE_USER"
+}
+
+# Fetch a file from the Backstage FTP server.
+# FILE_NAME is the name of the file on the Backstage server -- no path.
+function bs_get_file {
+    FILE_NAME="$1"
+    DEST_DIR="$2"
+
+    [ -z "$FILE_NAME" -o -z "$DEST_DIR" ] && \
+        die "bs_get_file() requires file name and destination directory"
+
+    [ ! -w "$DEST_DIR" ] && \
+        die "Destination directory is not writable: $DEST_DIR"
+
+    bs_check_creds;
+
+    # curl doens't have an output directory option, so go there, 
+    # get the file, then return.
+    cd $DEST_DIR;
+
+    if [ -f $FILE_NAME ]; then
+        echo "Backstage file already retrieved: $FILE_NAME"
+
+    else
+        echo "Fetching Backstage file $FILE_NAME"
+
+        $CURL -O "$BACKSTAGE_GET_URL/$FILE_NAME"
+
+        [ "$?" != 0 ] && \
+            die "curl failed to retrieve file: $BACKSTAGE_GET_URL/$FILE_NAME"
+    fi;
+
+    cd $SCRIPT_DIR;
+}
+
+# Put a file onto the Backstage FTP server.
+# FILE_NAME is the local file name.  May be relative to PWD or path-qualified.
+function bs_put_file {
+    FILE_NAME="$1"
+
+    [ -z "$FILE_NAME" ] && \
+        die "bs_put_file() requires a file name"
+
+    bs_check_creds;
+
+    echo "Putting Backstage file: $(ls -l $FILE_NAME)"
+
+    $CURL -T "$FILE_NAME" "$BACKSTAGE_PUT_URL/"
+
+    [ "$?" != 0 ] && \
+        die "curl failed to send file: $BACKSTAGE_PUT_URL/$FILE_NAME"
+}
+
+
+# Fetch the monthly authority update file and process the results.
+function bs_import_monthly_auths {
+    bs_make_dirs "monthly"
+
+    FILE="NTG${CUR_YEAR2}${CUR_MONTH}N.zip"
+
+    bs_get_file "$FILE" "$WORKING_DIR"
+
+    echo "Processing file $WORKING_DIR/$FILE ..."
+
+    perl ./process-backstage-files.pl \
+        --verbose \
+        --auth-only \
+        --zip-file $WORKING_DIR/$FILE \
+        --working-dir $WORKING_DIR \
+        --reports-dir $REPORTS_DIR \
+        > $WORKING_DIR/process.log
+}
+
+function bs_import_qtrly_results {
+    bs_set_qtr_dates;
+    bs_make_dirs "quarterly"
+
+    EXPORT_DATE="$(cat $WORKING_DIR/EXPORT_DATE)"
+
+    [ -z "$EXPORT_DATE" ] && \
+        die "No quarterly export data found at $WORKING_DIR/EXPORT_DATE"
+
+    # TODO file name?
+    FILE="NTG${CUR_YEAR2}${CUR_MONTH}N.zip"
+
+    bs_get_file "$FILE" "$WORKING_DIR"
+
+    echo "Processing file $WORKING_DIR/$FILE ..."
+
+    perl ./process-backstage-files.pl \
+        --verbose \
+        --export-date $EXPORT_DATE \
+        --zip-file $WORKING_DIR/$FILE \
+        --working-dir $WORKING_DIR \
+        --reports-dir $REPORTS_DIR \
+        > $WORKING_DIR/process.log
+
+}
+
+function bs_export_qtrly_bibs {
+    bs_set_qtr_dates;
+    bs_make_dirs "quarterly"
+
+    EXPORT_FILE="$WORKING_DIR/bib-export-qtrly.$CUR_YEAR4-$CUR_MONTH.mrc"
+
+    perl ./export-bibs.pl \
+        --start-date $PREV_QTR_START_DATE \
+        --end-date $PREV_QTR_END_DATE \
+        --out-file $EXPORT_FILE
+
+    if [ -f $EXPORT_FILE ]; then
+        if [ "$(stat -c '%b' $EXPORT_FILE)" == 0 ]; then
+            die "Empty bib export file created"
+        else
+            bs_put_file $EXPORT_FILE
+
+            # Put a file into the working directory with the export 
+            # date so the importer can refer to it later.
+            echo "$(date +'%F')" > "$WORKING_DIR/EXPORT_DATE"
+        fi
+    else 
+        die "No MARC export file was created at $EXPORT_FILE"
+    fi
+}
+
+
+# Sets the start and end dates of the previous quarter.
+# Probably a more elegant way to do this, oh well.
+function bs_set_qtr_dates {
+
+    YEAR=$CUR_YEAR4
+    START_MONTH="01"
+    END_DAY=31
+
+    if [ $CUR_QTR == 1 ]; then
+        # Processing records for Q4 of the previous year.
+        let YEAR=$CUR_YEAR4-1
+        START_MONTH=10
+
+    elif [ $CUR_QTR == 3 ]; then
+        START_MONTH="04"
+        END_DAY=30
+
+    elif [ $CUR_QTR == 4 ]; then
+        START_MONTH="07"
+        END_DAY=30
+    fi
+
+    let END_MONTH=$START_MONTH+2
+    PREV_QTR_START_DATE="$YEAR-$START_MONTH-01"
+    PREV_QTR_END_DATE="$YEAR-$(printf '%0.2d' $END_MONTH)-$END_DAY"
+
+    echo "Prev quarter dates: $PREV_QTR_START_DATE..$PREV_QTR_END_DATE"
+}
+
+
+# Create working and report files directories.
+function bs_make_dirs {
+    TYPE="$1" # monthly, quarterly
+
+    WORKING_DIR="$WORKING_DIR_BASE/$TYPE/$CUR_YEAR4-$CUR_MONTH"
+    REPORTS_DIR="$REPORTS_DIR_BASE/$TYPE/$CUR_YEAR4-$CUR_MONTH"
+
+    echo "Creating working directory: $WORKING_DIR"
+
+    mkdir -p $WORKING_DIR
+
+    [ ! -w $WORKING_DIR ] && 
+        die "Working directory is not writeable: $WORKING_DIR"
+
+    echo "Creating reports directory: $REPORTS_DIR"
+
+    mkdir -p $REPORTS_DIR
+
+    [ ! -w $REPORTS_DIR ] && 
+        die "Reports directory is not writeable: $REPORTS_DIR"
+}
+
+function usage {
+    cat <<USAGE
+        $0
+
+        Options
+            -a Import monthly authority update file.
+
+            -b Create and upload quarterly bib export
+            -q Process quarterly bib export results.
+
+            -h Show this help message.
+USAGE
+    exit;
+}
+
+
+while getopts "abqh" opt; do
+    case $opt in
+        a) bs_import_monthly_auths;;
+        q) bs_import_qtrly_results;;
+        b) bs_export_qtrly_bibs;;
+        h) usage;;
+    esac
+done;
+
+
+
index b29c6d8..8fc1a10 100755 (executable)
@@ -30,6 +30,8 @@ my $marc_file;
 my $zip_file;
 my $export_date;
 my $working_dir = '.',
+my $reports_dir;
+my $auth_only;
 my $verbose;
 my $bib_collision_file = 'bib-collisions.mrc'; # in --working-dir
 
@@ -51,7 +53,9 @@ GetOptions(
     'marc-file=s'   => \$marc_file,
     'zip-file=s'    => \$zip_file,
     'export-date=s' => \$export_date,
+    'auth-only'     => \$auth_only,
     'working-dir=s' => \$working_dir,
+    'reports-dir=s' => \$reports_dir,
     'verbose'       => \$verbose,
     'help'          => \$help
 );
@@ -72,6 +76,11 @@ Options
         specially when ingesting bib records produced by Backstage to
         avoid losing change made by staff since the export.
 
+    --auth-only
+        Forces the script to ignore any bib files its asked to process.
+        This also prevents the script from dying when no --export-date is
+        provide, since it only affects bib records.
+
     --file
         Full path to a single bib or authority MARC file.
 
@@ -90,11 +99,15 @@ $KU->verbose($verbose);
 $KU->syslog_ident('BACKSTAGE');
 
 $KU->announce('ERR', "required: --export-date YYYY-MM-DD", 1)
-    unless $export_date && $export_date =~ /^\d{4}-\d{2}-\d{2}$/;
+    unless $auth_only || 
+        ($export_date && $export_date =~ /^\d{4}-\d{2}-\d{2}$/);
 
 $KU->announce('ERR', "--marc-file or --zip-file required", 1) 
     unless ($marc_file || $zip_file);
 
+$KU->announce('ERR', "--reports-dir is not writeable", 1)
+    if $reports_dir && ! -w $reports_dir;
+
 # Log every occurrence of each event type.
 $log_mod = 1 if $verbose;
 
@@ -115,19 +128,37 @@ sub process_zip_file {
 
     # Start by locating the MARC files in the ZIP file
     # All of the MARC files end in .UTF8 or MRC.
-    for my $member ($zip->membersMatching('.*(\.UTF8|\.MRC)')) {
+    for my $member ($zip->members) {
 
         my $basename = basename($member->fileName());
 
-        $KU->announce('INFO', "Extracting file $basename");
+        if ($basename =~ /(\.UTF8|\.MRC)$/) {
+            $KU->announce('INFO', "Processing MARC file $basename");
 
-        my $local_file = "$working_dir/$basename";
+            my $local_file = "$working_dir/$basename";
 
-        $KU->announce('ERR', "Unable to extract to file: $local_file", 1)
-            unless $member->extractToFileNamed($local_file) == AZ_OK;
+            $KU->announce('ERR', "Unable to extract to file: $local_file", 1)
+                unless $member->extractToFileNamed($local_file) == AZ_OK;
+
+            if ($basename =~ /BIB/) {
+                if ($auth_only) {
+                    $KU->announce('WARNING', "Processing as --auth-only.  ".
+                    "Skipping bib file $local_file.");
+                } else {
+                    push(@{$marc_files{bib}}, $local_file);
+                }
+            } else {
+                push(@{$marc_files{auth}}, $local_file);
+            }
 
-        my $key = ($basename =~ /BIB/) ? 'bib' : 'auth';
-        push(@{$marc_files{$key}}, $local_file);
+        } elsif ($reports_dir) {
+            $KU->announce('INFO', "Copying file to reports dir $basename");
+
+            my $local_file = "$reports_dir/$basename";
+
+            $KU->announce('ERR', "Unable to extract to file: $local_file", 1)
+                unless $member->extractToFileNamed($local_file) == AZ_OK;
+        }
     }
 
     # Then process bib files first, followed by authority files.
index ede59bb..4cad848 100644 (file)
@@ -14,6 +14,8 @@ PGUSER     = evergreen
 PGDATABASE = evergreen
 # change for cluster install
 PGHOST     = localhost 
+BACKSTAGE_USER = BSUSER
+BACKSTAGE_PASSWORD = BSPASS
 # Uncomment on production to generate email alerts for certain actions
 # EG_UTIL_NOTIFY = 1