From 486f5fd4f8fb53cbde8c066064d3d9925fe6f9da Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Mon, 19 Dec 2016 12:32:37 -0500 Subject: [PATCH] JBAS-1437 Backstage processor script Script to fetch and process monthly and quarterly Backstage data. Signed-off-by: Bill Erickson --- KCLS/authority-control/backstage/README.adoc | 7 +- .../authority-control/backstage/backstage-agent.sh | 245 +++++++++++++++++++++ .../backstage/process-backstage-files.pl | 47 +++- KCLS/utility-scripts/CRONTAB | 2 + 4 files changed, 290 insertions(+), 11 deletions(-) create mode 100755 KCLS/authority-control/backstage/backstage-agent.sh diff --git a/KCLS/authority-control/backstage/README.adoc b/KCLS/authority-control/backstage/README.adoc index 1db1005aed..247c59d68b 100644 --- a/KCLS/authority-control/backstage/README.adoc +++ b/KCLS/authority-control/backstage/README.adoc @@ -2,17 +2,18 @@ Perform steps as 'opensrf' +TODO: update me when backstage-agent.sh is done. + == Quarterly Export + Import == === Setup === [source,sh] -------------------------------------------------------------------- -export EXPORT_DATE=2016-10-01 # for example +export EXPORT_DATE=2016-10-01 export WORKING_DIR=/openils/var/data/authority-control/backstage/quarterly/$EXPORT_DATE export PGHOST=foo export PGPASSWORD=foo -export PGUSER=evergreen mkdir -p $WORKING_DIR -------------------------------------------------------------------- @@ -58,7 +59,7 @@ cd /home/opensrf/Evergreen/KCLS/backstage/ --export-date $EXPORT_DATE \ --zip-file $WORKING_DIR/ \ --working-dir $WORKING_DIR \ - --bib-collision-file bib-collisions.mrc \ + --reports-dir $REPORTS_DIR \ # TODO > $WORKING_DIR/process.log -------------------------------------------------------------------- diff --git a/KCLS/authority-control/backstage/backstage-agent.sh b/KCLS/authority-control/backstage/backstage-agent.sh new file mode 100755 index 0000000000..b9fa3af469 --- /dev/null +++ b/KCLS/authority-control/backstage/backstage-agent.sh @@ -0,0 +1,245 @@ +#!/bin/bash +# ---------------------------------------------------------------- +# Backstage file processing utility functions. +# +# Environment variables BACKSTAGE_USER and BACKSTAGE_PASSWORD +# must be set. +# +# PGHOST and PGPASSWORD may also need to be set. +# ---------------------------------------------------------------- +BACKSTAGE_PUT_URL="ftp://ftp.bslw.com/in" +# TODO: determine pickup location of quarterly result files +#BACKSTAGE_GET_URL="ftp://ftp.bslw.com/out" +# XXX: this is where monthly auth files are delivered. +BACKSTAGE_GET_URL="http://mars.bslw.com/ftpfiles/NTG" +WORKING_DIR_BASE="/openils/var/data/authority-control/backstage" +REPORTS_DIR_BASE="/openils/var/web/standalone/backstage" + +# see bs_make_dirs() +REPORTS_DIR="" +WORKING_DIR="" + +SCRIPT_DIR=$PWD +CURL="curl --silent --show-error --user $BACKSTAGE_USER:$BACKSTAGE_PASSWORD"; + +CUR_MONTH=$(date +'%m') +CUR_YEAR2=$(date +'%y') +CUR_YEAR4=$(date +'%Y') +CUR_QTR=$(( ($(date +%-m)-1)/3+1 )) +PREV_QTR_START_DATE="" +PREV_QTR_END_DATE="" + +function die { echo "$@" 1>&2 ; exit 1; } # thanks, internet. + +# Confirm Backstage username and password are provided. +function bs_check_creds { + + [ -z "${BACKSTAGE_USER+x}" -o -z "${BACKSTAGE_PASSWORD+x}" ] && \ + die "ENV variables BACKSTAGE_USER and BACKSTAGE_PASSWORD required." + + echo "Connecting to backstage as user $BACKSTAGE_USER" +} + +# Fetch a file from the Backstage FTP server. +# FILE_NAME is the name of the file on the Backstage server -- no path. +function bs_get_file { + FILE_NAME="$1" + DEST_DIR="$2" + + [ -z "$FILE_NAME" -o -z "$DEST_DIR" ] && \ + die "bs_get_file() requires file name and destination directory" + + [ ! -w "$DEST_DIR" ] && \ + die "Destination directory is not writable: $DEST_DIR" + + bs_check_creds; + + # curl doens't have an output directory option, so go there, + # get the file, then return. + cd $DEST_DIR; + + if [ -f $FILE_NAME ]; then + echo "Backstage file already retrieved: $FILE_NAME" + + else + echo "Fetching Backstage file $FILE_NAME" + + $CURL -O "$BACKSTAGE_GET_URL/$FILE_NAME" + + [ "$?" != 0 ] && \ + die "curl failed to retrieve file: $BACKSTAGE_GET_URL/$FILE_NAME" + fi; + + cd $SCRIPT_DIR; +} + +# Put a file onto the Backstage FTP server. +# FILE_NAME is the local file name. May be relative to PWD or path-qualified. +function bs_put_file { + FILE_NAME="$1" + + [ -z "$FILE_NAME" ] && \ + die "bs_put_file() requires a file name" + + bs_check_creds; + + echo "Putting Backstage file: $(ls -l $FILE_NAME)" + + $CURL -T "$FILE_NAME" "$BACKSTAGE_PUT_URL/" + + [ "$?" != 0 ] && \ + die "curl failed to send file: $BACKSTAGE_PUT_URL/$FILE_NAME" +} + + +# Fetch the monthly authority update file and process the results. +function bs_import_monthly_auths { + bs_make_dirs "monthly" + + FILE="NTG${CUR_YEAR2}${CUR_MONTH}N.zip" + + bs_get_file "$FILE" "$WORKING_DIR" + + echo "Processing file $WORKING_DIR/$FILE ..." + + perl ./process-backstage-files.pl \ + --verbose \ + --auth-only \ + --zip-file $WORKING_DIR/$FILE \ + --working-dir $WORKING_DIR \ + --reports-dir $REPORTS_DIR \ + > $WORKING_DIR/process.log +} + +function bs_import_qtrly_results { + bs_set_qtr_dates; + bs_make_dirs "quarterly" + + EXPORT_DATE="$(cat $WORKING_DIR/EXPORT_DATE)" + + [ -z "$EXPORT_DATE" ] && \ + die "No quarterly export data found at $WORKING_DIR/EXPORT_DATE" + + # TODO file name? + FILE="NTG${CUR_YEAR2}${CUR_MONTH}N.zip" + + bs_get_file "$FILE" "$WORKING_DIR" + + echo "Processing file $WORKING_DIR/$FILE ..." + + perl ./process-backstage-files.pl \ + --verbose \ + --export-date $EXPORT_DATE \ + --zip-file $WORKING_DIR/$FILE \ + --working-dir $WORKING_DIR \ + --reports-dir $REPORTS_DIR \ + > $WORKING_DIR/process.log + +} + +function bs_export_qtrly_bibs { + bs_set_qtr_dates; + bs_make_dirs "quarterly" + + EXPORT_FILE="$WORKING_DIR/bib-export-qtrly.$CUR_YEAR4-$CUR_MONTH.mrc" + + perl ./export-bibs.pl \ + --start-date $PREV_QTR_START_DATE \ + --end-date $PREV_QTR_END_DATE \ + --out-file $EXPORT_FILE + + if [ -f $EXPORT_FILE ]; then + if [ "$(stat -c '%b' $EXPORT_FILE)" == 0 ]; then + die "Empty bib export file created" + else + bs_put_file $EXPORT_FILE + + # Put a file into the working directory with the export + # date so the importer can refer to it later. + echo "$(date +'%F')" > "$WORKING_DIR/EXPORT_DATE" + fi + else + die "No MARC export file was created at $EXPORT_FILE" + fi +} + + +# Sets the start and end dates of the previous quarter. +# Probably a more elegant way to do this, oh well. +function bs_set_qtr_dates { + + YEAR=$CUR_YEAR4 + START_MONTH="01" + END_DAY=31 + + if [ $CUR_QTR == 1 ]; then + # Processing records for Q4 of the previous year. + let YEAR=$CUR_YEAR4-1 + START_MONTH=10 + + elif [ $CUR_QTR == 3 ]; then + START_MONTH="04" + END_DAY=30 + + elif [ $CUR_QTR == 4 ]; then + START_MONTH="07" + END_DAY=30 + fi + + let END_MONTH=$START_MONTH+2 + PREV_QTR_START_DATE="$YEAR-$START_MONTH-01" + PREV_QTR_END_DATE="$YEAR-$(printf '%0.2d' $END_MONTH)-$END_DAY" + + echo "Prev quarter dates: $PREV_QTR_START_DATE..$PREV_QTR_END_DATE" +} + + +# Create working and report files directories. +function bs_make_dirs { + TYPE="$1" # monthly, quarterly + + WORKING_DIR="$WORKING_DIR_BASE/$TYPE/$CUR_YEAR4-$CUR_MONTH" + REPORTS_DIR="$REPORTS_DIR_BASE/$TYPE/$CUR_YEAR4-$CUR_MONTH" + + echo "Creating working directory: $WORKING_DIR" + + mkdir -p $WORKING_DIR + + [ ! -w $WORKING_DIR ] && + die "Working directory is not writeable: $WORKING_DIR" + + echo "Creating reports directory: $REPORTS_DIR" + + mkdir -p $REPORTS_DIR + + [ ! -w $REPORTS_DIR ] && + die "Reports directory is not writeable: $REPORTS_DIR" +} + +function usage { + cat < \$marc_file, 'zip-file=s' => \$zip_file, 'export-date=s' => \$export_date, + 'auth-only' => \$auth_only, 'working-dir=s' => \$working_dir, + 'reports-dir=s' => \$reports_dir, 'verbose' => \$verbose, 'help' => \$help ); @@ -72,6 +76,11 @@ Options specially when ingesting bib records produced by Backstage to avoid losing change made by staff since the export. + --auth-only + Forces the script to ignore any bib files its asked to process. + This also prevents the script from dying when no --export-date is + provide, since it only affects bib records. + --file Full path to a single bib or authority MARC file. @@ -90,11 +99,15 @@ $KU->verbose($verbose); $KU->syslog_ident('BACKSTAGE'); $KU->announce('ERR', "required: --export-date YYYY-MM-DD", 1) - unless $export_date && $export_date =~ /^\d{4}-\d{2}-\d{2}$/; + unless $auth_only || + ($export_date && $export_date =~ /^\d{4}-\d{2}-\d{2}$/); $KU->announce('ERR', "--marc-file or --zip-file required", 1) unless ($marc_file || $zip_file); +$KU->announce('ERR', "--reports-dir is not writeable", 1) + if $reports_dir && ! -w $reports_dir; + # Log every occurrence of each event type. $log_mod = 1 if $verbose; @@ -115,19 +128,37 @@ sub process_zip_file { # Start by locating the MARC files in the ZIP file # All of the MARC files end in .UTF8 or MRC. - for my $member ($zip->membersMatching('.*(\.UTF8|\.MRC)')) { + for my $member ($zip->members) { my $basename = basename($member->fileName()); - $KU->announce('INFO', "Extracting file $basename"); + if ($basename =~ /(\.UTF8|\.MRC)$/) { + $KU->announce('INFO', "Processing MARC file $basename"); - my $local_file = "$working_dir/$basename"; + my $local_file = "$working_dir/$basename"; - $KU->announce('ERR', "Unable to extract to file: $local_file", 1) - unless $member->extractToFileNamed($local_file) == AZ_OK; + $KU->announce('ERR', "Unable to extract to file: $local_file", 1) + unless $member->extractToFileNamed($local_file) == AZ_OK; + + if ($basename =~ /BIB/) { + if ($auth_only) { + $KU->announce('WARNING', "Processing as --auth-only. ". + "Skipping bib file $local_file."); + } else { + push(@{$marc_files{bib}}, $local_file); + } + } else { + push(@{$marc_files{auth}}, $local_file); + } - my $key = ($basename =~ /BIB/) ? 'bib' : 'auth'; - push(@{$marc_files{$key}}, $local_file); + } elsif ($reports_dir) { + $KU->announce('INFO', "Copying file to reports dir $basename"); + + my $local_file = "$reports_dir/$basename"; + + $KU->announce('ERR', "Unable to extract to file: $local_file", 1) + unless $member->extractToFileNamed($local_file) == AZ_OK; + } } # Then process bib files first, followed by authority files. diff --git a/KCLS/utility-scripts/CRONTAB b/KCLS/utility-scripts/CRONTAB index ede59bb958..4cad84870e 100644 --- a/KCLS/utility-scripts/CRONTAB +++ b/KCLS/utility-scripts/CRONTAB @@ -14,6 +14,8 @@ PGUSER = evergreen PGDATABASE = evergreen # change for cluster install PGHOST = localhost +BACKSTAGE_USER = BSUSER +BACKSTAGE_PASSWORD = BSPASS # Uncomment on production to generate email alerts for certain actions # EG_UTIL_NOTIFY = 1 -- 2.11.0