From: Chris Sharp Date: Thu, 27 Nov 2014 03:57:58 +0000 (-0500) Subject: Adding monthly OCLC export and FTP script X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=08a45b383efa2cc21b34233c41193763de3aa66f;p=contrib%2Fpines.git Adding monthly OCLC export and FTP script --- diff --git a/oclc/oclc_export_and_ftp.sh b/oclc/oclc_export_and_ftp.sh new file mode 100755 index 0000000..5d32607 --- /dev/null +++ b/oclc/oclc_export_and_ftp.sh @@ -0,0 +1,162 @@ +#!/bin/bash +# +# Copyright (C) 2014 Georgia Public Library Service +# Chris Sharp +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# A program for automating OCLC export and FTP upload. + +WORKDIR="$PWD" +DATE=`date +%y%m%d` +DATE_LONG=`date +%Y%m%d` +PSQL="/usr/bin/psql" +PSQL_USER="mydbuser" +SQL_FILE="$WORKDIR/get_ids.sql" +DB_HOST="mydbhost" +DB_NAME="mydbname" +OUTDIR="$WORKDIR/bib_ids" +OUTFILE="bib_ids_`date +%F`.out" +if [ -e last_export_date ]; then +BIBDATE=`cat last_export_date` +fi +EXPORT="DATA.D$DATE" +LABEL="LABEL.D$DATE" +PROJECT_ID="myprojectid" +OCLC_SYMBOL="myoclcsymbol" +EXPORT_DIR="$WORKDIR/out" +MARC_COUNT="$WORKDIR/marccount.pl" +CHUNKSIZE=50000 +SPLIT_BIB_DIR="$OUTDIR/split" +FTP_HOST="my.ftp.host" +FTP_USER="myftpuser" +FTP_PASS="myftppass" +FTP_PATH="myftppath" +ADMIN_EMAIL="admin@example.org" + +# Do we have the required directories? If not, create them +CheckEnv () { +for dir in $OUTDIR $EXPORT_DIR; do + if [ ! -e "$dir" ] && [ ! -d "$dir" ]; then + echo "Required directory $dir not found. Creating..." + mkdir "$dir" + fi +done +} + +# If we have run this before, use the date in the last_export_date file +CreateBibList () { +if [ -z $BIBDATE ]; then + read -p "No last export date known. Please provide the date at which you want to begin export (YYYY-MM-DD): " BIBDATE + echo "$BIBDATE" | egrep '^20[0-9][0-9]-[0-1][0-9]-[0-3][0-9]$' > /dev/null + if [ $? -ne 0 ]; then + echo "$BIBDATE is not a valid date. Please start over." + exit 1; + fi +fi + +$PSQL -U $PSQL_USER -h $DB_HOST -A -t -o $OUTDIR/$OUTFILE -v bibdate="'$BIBDATE'" -f $SQL_FILE $DB_NAME +} + +# Branch here: if we have an $OUTFILE of under $CHUNKSIZE bib ids, just treat it as a single file +ExportBibsSingleFile () { +cat $OUTDIR/$OUTFILE | /openils/bin/marc_export --items --encoding UTF-8 > $EXPORT_DIR/$EXPORT +date +%F > last_export_date +} + +CountBibsSingleFile () { +RECORDCOUNT=`$MARC_COUNT $EXPORT_DIR/$EXPORT` +} + +CreateLabelSingleFile () { +cat << EOF > $EXPORT_DIR/$LABEL +DAT ${DATE_LONG}000000.0 +RBF $RECORDCOUNT +DSN $EXPORT +ORS $OCLC_SYMBOL +FDI $PROJECT_ID +EOF +} + +FTPSingleFile () { +cd $EXPORT_DIR +ftp -inv $FTP_HOST << EOF +user $FTP_USER $FTP_PASS +cd '$FTP_PATH' +binary +put $EXPORT +put $LABEL +EOF +cd $WORKDIR +} + +# Otherwise, we need to split the file into $CHUNKSIZE chunks and process them in a loop +SplitIDList () { +if [ ! -d "$SPLIT_BIB_DIR" ]; then + echo "Creating $SPLIT_BIB_DIR." + mkdir "$SPLIT_BIB_DIR" +fi +cd "$SPLIT_BIB_DIR" || { + echo "Could not change to $SPLIT_BIB_DIR." + exit 1; +} +BIBCOUNT=`wc -l $OUTDIR/$OUTFILE | awk '{print $1}'`; +split -l "$CHUNKSIZE" "$OUTDIR"/"$OUTFILE" "bib_ids_$DATE" +echo "Split $BIBCOUNT records into chunks of $CHUNKSIZE in $SPLIT_BIB_DIR." +} + +ExportBibsMultiFile () { +echo "Beginning Bib Export." +cd "$SPLIT_BIB_DIR" || { + echo "Could not change to $SPLIT_BIB_DIR." + exit 1; +} +i="1" +for bibfile in `ls bib_ids_*`; do + echo "Processing $bibfile..." + cat $bibfile | /openils/bin/marc_export --items --encoding UTF-8 > $OUTDIR/DATA.D$DATE.FILE$i + RECORDCOUNT=`$MARC_COUNT $OUTDIR/DATA.D$DATE.FILE$i` + echo "DATA.D$DATE.FILE$i contains $RECORDCOUNT records" + cat << EOF > $OUTDIR/LABEL.D$DATE.FILE$i +DAT ${DATE_LONG}0000000.0 +RBF $RECORDCOUNT +DSN DATA.D$DATE.FILE$i +ORS $OCLC_SYMBOL +FDI $PROJECT_ID +EOF + i=$[$i+1] # set up counter here +done +date +%F > last_export_date +} + +FTPMultiFile () { +# FIXME: It's actually unlikely that a monthly export will exceed 50K +# but if it does, and it becomes a burden to do this manually, we +# can add something here to automate multiple file FTP. ;-) +} + +echo "OCLC bib export began at `date`." | mutt -s "OCLC Export Begun" $ADMIN_EMAIL +CheckEnv +CreateBibList +if [ `wc -l $OUTDIR/$OUTFILE | awk '{print $1}'` -gt "$CHUNKSIZE" ]; then + SplitIDList + ExportBibsMultiFile +# FTPMultiFile +else + ExportBibsSingleFile + CountBibsSingleFile + CreateLabelSingleFile + FTPSingleFile +fi +echo "OCLC bib export completed at `date`." | mutt -s "OCLC Export Completed" $ADMIN_EMAIL