From: Chris Sharp Date: Wed, 8 Feb 2017 16:48:13 +0000 (-0500) Subject: updating the OCLC export script to work with WorldShare X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=b8e59cb25a83c4909cc43a87b2eef940878bea96;p=contrib%2Fpines.git updating the OCLC export script to work with WorldShare --- diff --git a/oclc/oclc_export_and_ftp.sh b/oclc/oclc_export_and_ftp.sh index 742bc17..564dcc2 100755 --- a/oclc/oclc_export_and_ftp.sh +++ b/oclc/oclc_export_and_ftp.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (C) 2014 Georgia Public Library Service +# Copyright (C) 2014-2017 Georgia Public Library Service # Chris Sharp # # This program is free software: you can redistribute it and/or modify @@ -22,36 +22,30 @@ WORKDIR="$PWD" DATE=`date +%y%m%d` DATE_LONG=`date +%Y%m%d` PSQL="/usr/bin/psql" -PSQL_USER="mydbuser" -DB_HOST="mydbhost" -DB_NAME="mydbname" +PSQL_USER="evergreen" +DB_HOST="db03" +DB_NAME="evergreen" OUTDIR="$WORKDIR/bib_ids" OUTFILE="bib_ids_`date +%F`.out" if [ -e $WORKDIR/last_export_date ]; then -BIBDATE=`cat $WORKDIR/last_export_date` + BIBDATE=`cat $WORKDIR/last_export_date` fi -EXPORT="DATA.D$DATE" -LABEL="LABEL.D$DATE" -PROJECT_ID="myprojectid" -OCLC_SYMBOL="myoclcsymbol" +EXPORT="1011413.P_64N.bibs${DATE}.mrc" EXPORT_DIR="$WORKDIR/out" -MARC_COUNT="$WORKDIR/marccount.pl" -CHUNKSIZE=50000 -SPLIT_BIB_DIR="$OUTDIR/split" -FTP_HOST="my.ftp.host" -FTP_USER="myftpusr" -FTP_PASS="myftppass" -FTP_PATH="myftpfilepath" -ADMIN_EMAIL="admins@myactualdomain.org" +SCP="/usr/bin/scp" +SCP_USER="p_64n" +SCP_HOST="ftp2.oclc.org" +SCP_PATH="/xfer/metacoll/in/bib" +ADMIN_EMAIL="pines-admins@georgialibraries.org" # Do we have the required directories? If not, create them CheckEnv () { for dir in $OUTDIR $EXPORT_DIR; do - if [ ! -e "$dir" ] && [ ! -d "$dir" ]; then - echo "Required directory $dir not found. Creating..." - mkdir "$dir" - fi + if [ ! -e "$dir" ] && [ ! -d "$dir" ]; then + echo "Required directory $dir not found. Creating..." + mkdir "$dir" + fi done } @@ -59,12 +53,14 @@ done CreateBibList () { if [ -z $BIBDATE ]; then read -p "No last export date known. Please provide the date at which you want to begin export (YYYY-MM-DD): " BIBDATE + # check to see if the input kinda looks like a valid date echo "$BIBDATE" | egrep '^20[0-9][0-9]-[0-1][0-9]-[0-3][0-9]$' > /dev/null if [ $? -ne 0 ]; then echo "$BIBDATE is not a valid date. Please start over." exit 1; fi fi +# This query grabs ids for bibs with items created since $BIBDATE read -d '' SQL < '$BIBDATE' ) - where bre.id > 0 -- we don't want -1 + where bre.id > 0 -- we don't want -1 and not bre.deleted - -- exclude too-large records - and bre.id not in (`grep -v ^# toolong | sed ':a;N;$!ba;s/\n/, /g'`)" - order by id;" + -- we used to exclude too-large records here, but MARCXML has no size limits + order by id; EOF $PSQL -U $PSQL_USER -h $DB_HOST -A -t -o $OUTDIR/$OUTFILE -c "$SQL" $DB_NAME } -# Branch here: if we have an $OUTFILE of under $CHUNKSIZE bib ids, just treat it as a single file -ExportBibsSingleFile () { -cat $OUTDIR/$OUTFILE | /openils/bin/marc_export --items --encoding UTF-8 > $EXPORT_DIR/$EXPORT +# Create the export in MARCXML and record the date for use in the next run +ExportBibs () { +cat $OUTDIR/$OUTFILE | /openils/bin/marc_export --items --encoding UTF-8 --format XML > $EXPORT_DIR/$EXPORT date +%F > $WORKDIR/last_export_date } -CountBibsSingleFile () { -RECORDCOUNT=`$MARC_COUNT $EXPORT_DIR/$EXPORT` -} - -CreateLabelSingleFile () { -cat << EOF > $EXPORT_DIR/$LABEL -DAT ${DATE_LONG}000000.0 -RBF $RECORDCOUNT -DSN $EXPORT -ORS $OCLC_SYMBOL -FDI $PROJECT_ID -EOF -} - -FTPSingleFile () { -cd $EXPORT_DIR -ftp -inv $FTP_HOST << EOF -user $FTP_USER $FTP_PASS -cd '$FTP_PATH' -binary -put $EXPORT -put $LABEL -EOF -cd $WORKDIR -} - -# Otherwise, we need to split the file into $CHUNKSIZE chunks and process them in a loop -SplitIDList () { -if [ ! -d "$SPLIT_BIB_DIR" ]; then - echo "Creating $SPLIT_BIB_DIR." - mkdir "$SPLIT_BIB_DIR" -fi -cd "$SPLIT_BIB_DIR" || { - echo "Could not change to $SPLIT_BIB_DIR." - exit 1; -} -BIBCOUNT=`wc -l $OUTDIR/$OUTFILE | awk '{print $1}'`; -split -l "$CHUNKSIZE" "$OUTDIR"/"$OUTFILE" "bib_ids_$DATE" -echo "Split $BIBCOUNT records into chunks of $CHUNKSIZE in $SPLIT_BIB_DIR." -} - -ExportBibsMultiFile () { -echo "Beginning Bib Export." -cd "$SPLIT_BIB_DIR" || { - echo "Could not change to $SPLIT_BIB_DIR." - exit 1; -} -i="1" -for bibfile in `ls bib_ids_*`; do - echo "Processing $bibfile..." - cat $bibfile | /openils/bin/marc_export --items --encoding UTF-8 > $EXPORT_DIR/DATA.D$DATE.FILE$i - RECORDCOUNT=`$MARC_COUNT $EXPORT_DIR/DATA.D$DATE.FILE$i` - echo "DATA.D$DATE.FILE$i contains $RECORDCOUNT records" - cat << EOF > $EXPORT_DIR/LABEL.D$DATE.FILE$i -DAT ${DATE_LONG}0000000.0 -RBF $RECORDCOUNT -DSN DATA.D$DATE.FILE$i -ORS $OCLC_SYMBOL -FDI $PROJECT_ID -EOF - i=$[$i+1] # set up counter here -done -date +%F > $WORKDIR/last_export_date -} - -FTPMultiFile () { -# FIXME: It's actually unlikely that a monthly export will exceed 50K -# but if it does, and it becomes a burden to do this manually, we -# can add something here to automate multiple file FTP. ;-) -echo "FTP multifile support is not yet implemented." +# We could use SFTP here, but SCP is better - remote server must have our key installed +SCPFile () { +$SCP $EXPORT_DIR/$EXPORT $SCP_USER@$SCP_HOST:$SCP_PATH } echo "OCLC bib export began at `date`." | mutt -s "OCLC Export Begun" $ADMIN_EMAIL CheckEnv CreateBibList -if [ `wc -l $OUTDIR/$OUTFILE | awk '{print $1}'` -gt "$CHUNKSIZE" ]; then - SplitIDList - ExportBibsMultiFile -# FTPMultiFile -else - ExportBibsSingleFile - CountBibsSingleFile - CreateLabelSingleFile - FTPSingleFile -fi +ExportBibs +SCPFile echo "OCLC bib export completed at `date`." | mutt -s "OCLC Export Completed" $ADMIN_EMAIL