#!/bin/bash
#
-# Copyright (C) 2014 Georgia Public Library Service
+# Copyright (C) 2014-2017 Georgia Public Library Service
# Chris Sharp <csharp@georgialibraries.org>
#
# This program is free software: you can redistribute it and/or modify
DATE=`date +%y%m%d`
DATE_LONG=`date +%Y%m%d`
PSQL="/usr/bin/psql"
-PSQL_USER="mydbuser"
-DB_HOST="mydbhost"
-DB_NAME="mydbname"
+PSQL_USER="evergreen"
+DB_HOST="db03"
+DB_NAME="evergreen"
OUTDIR="$WORKDIR/bib_ids"
OUTFILE="bib_ids_`date +%F`.out"
if [ -e $WORKDIR/last_export_date ]; then
-BIBDATE=`cat $WORKDIR/last_export_date`
+ BIBDATE=`cat $WORKDIR/last_export_date`
fi
-EXPORT="DATA.D$DATE"
-LABEL="LABEL.D$DATE"
-PROJECT_ID="myprojectid"
-OCLC_SYMBOL="myoclcsymbol"
+EXPORT="1011413.P_64N.bibs${DATE}.mrc"
EXPORT_DIR="$WORKDIR/out"
-MARC_COUNT="$WORKDIR/marccount.pl"
-CHUNKSIZE=50000
-SPLIT_BIB_DIR="$OUTDIR/split"
-FTP_HOST="my.ftp.host"
-FTP_USER="myftpusr"
-FTP_PASS="myftppass"
-FTP_PATH="myftpfilepath"
-ADMIN_EMAIL="admins@myactualdomain.org"
+SCP="/usr/bin/scp"
+SCP_USER="p_64n"
+SCP_HOST="ftp2.oclc.org"
+SCP_PATH="/xfer/metacoll/in/bib"
+ADMIN_EMAIL="pines-admins@georgialibraries.org"
# Do we have the required directories? If not, create them
CheckEnv () {
for dir in $OUTDIR $EXPORT_DIR; do
- if [ ! -e "$dir" ] && [ ! -d "$dir" ]; then
- echo "Required directory $dir not found. Creating..."
- mkdir "$dir"
- fi
+ if [ ! -e "$dir" ] && [ ! -d "$dir" ]; then
+ echo "Required directory $dir not found. Creating..."
+ mkdir "$dir"
+ fi
done
}
CreateBibList () {
if [ -z $BIBDATE ]; then
read -p "No last export date known. Please provide the date at which you want to begin export (YYYY-MM-DD): " BIBDATE
+ # check to see if the input kinda looks like a valid date
echo "$BIBDATE" | egrep '^20[0-9][0-9]-[0-1][0-9]-[0-3][0-9]$' > /dev/null
if [ $? -ne 0 ]; then
echo "$BIBDATE is not a valid date. Please start over."
exit 1;
fi
fi
+# This query grabs ids for bibs with items created since $BIBDATE
read -d '' SQL <<EOF
select distinct bre.id
from biblio.record_entry bre
not acp.deleted
and acp.create_date::date > '$BIBDATE'
)
- where bre.id > 0 -- we don't want -1
+ where bre.id > 0 -- we don't want -1
and not bre.deleted
- -- exclude too-large records
- and bre.id not in (`grep -v ^# toolong | sed ':a;N;$!ba;s/\n/, /g'`)"
- order by id;"
+ -- we used to exclude too-large records here, but MARCXML has no size limits
+ order by id;
EOF
$PSQL -U $PSQL_USER -h $DB_HOST -A -t -o $OUTDIR/$OUTFILE -c "$SQL" $DB_NAME
}
-# Branch here: if we have an $OUTFILE of under $CHUNKSIZE bib ids, just treat it as a single file
-ExportBibsSingleFile () {
-cat $OUTDIR/$OUTFILE | /openils/bin/marc_export --items --encoding UTF-8 > $EXPORT_DIR/$EXPORT
+# Create the export in MARCXML and record the date for use in the next run
+ExportBibs () {
+cat $OUTDIR/$OUTFILE | /openils/bin/marc_export --items --encoding UTF-8 --format XML > $EXPORT_DIR/$EXPORT
date +%F > $WORKDIR/last_export_date
}
-CountBibsSingleFile () {
-RECORDCOUNT=`$MARC_COUNT $EXPORT_DIR/$EXPORT`
-}
-
-CreateLabelSingleFile () {
-cat << EOF > $EXPORT_DIR/$LABEL
-DAT ${DATE_LONG}000000.0
-RBF $RECORDCOUNT
-DSN $EXPORT
-ORS $OCLC_SYMBOL
-FDI $PROJECT_ID
-EOF
-}
-
-FTPSingleFile () {
-cd $EXPORT_DIR
-ftp -inv $FTP_HOST << EOF
-user $FTP_USER $FTP_PASS
-cd '$FTP_PATH'
-binary
-put $EXPORT
-put $LABEL
-EOF
-cd $WORKDIR
-}
-
-# Otherwise, we need to split the file into $CHUNKSIZE chunks and process them in a loop
-SplitIDList () {
-if [ ! -d "$SPLIT_BIB_DIR" ]; then
- echo "Creating $SPLIT_BIB_DIR."
- mkdir "$SPLIT_BIB_DIR"
-fi
-cd "$SPLIT_BIB_DIR" || {
- echo "Could not change to $SPLIT_BIB_DIR."
- exit 1;
-}
-BIBCOUNT=`wc -l $OUTDIR/$OUTFILE | awk '{print $1}'`;
-split -l "$CHUNKSIZE" "$OUTDIR"/"$OUTFILE" "bib_ids_$DATE"
-echo "Split $BIBCOUNT records into chunks of $CHUNKSIZE in $SPLIT_BIB_DIR."
-}
-
-ExportBibsMultiFile () {
-echo "Beginning Bib Export."
-cd "$SPLIT_BIB_DIR" || {
- echo "Could not change to $SPLIT_BIB_DIR."
- exit 1;
-}
-i="1"
-for bibfile in `ls bib_ids_*`; do
- echo "Processing $bibfile..."
- cat $bibfile | /openils/bin/marc_export --items --encoding UTF-8 > $EXPORT_DIR/DATA.D$DATE.FILE$i
- RECORDCOUNT=`$MARC_COUNT $EXPORT_DIR/DATA.D$DATE.FILE$i`
- echo "DATA.D$DATE.FILE$i contains $RECORDCOUNT records"
- cat << EOF > $EXPORT_DIR/LABEL.D$DATE.FILE$i
-DAT ${DATE_LONG}0000000.0
-RBF $RECORDCOUNT
-DSN DATA.D$DATE.FILE$i
-ORS $OCLC_SYMBOL
-FDI $PROJECT_ID
-EOF
- i=$[$i+1] # set up counter here
-done
-date +%F > $WORKDIR/last_export_date
-}
-
-FTPMultiFile () {
-# FIXME: It's actually unlikely that a monthly export will exceed 50K
-# but if it does, and it becomes a burden to do this manually, we
-# can add something here to automate multiple file FTP. ;-)
-echo "FTP multifile support is not yet implemented."
+# We could use SFTP here, but SCP is better - remote server must have our key installed
+SCPFile () {
+$SCP $EXPORT_DIR/$EXPORT $SCP_USER@$SCP_HOST:$SCP_PATH
}
echo "OCLC bib export began at `date`." | mutt -s "OCLC Export Begun" $ADMIN_EMAIL
CheckEnv
CreateBibList
-if [ `wc -l $OUTDIR/$OUTFILE | awk '{print $1}'` -gt "$CHUNKSIZE" ]; then
- SplitIDList
- ExportBibsMultiFile
-# FTPMultiFile
-else
- ExportBibsSingleFile
- CountBibsSingleFile
- CreateLabelSingleFile
- FTPSingleFile
-fi
+ExportBibs
+SCPFile
echo "OCLC bib export completed at `date`." | mutt -s "OCLC Export Completed" $ADMIN_EMAIL