From: Galen Charlton Date: Wed, 1 Aug 2012 19:21:44 +0000 (-0400) Subject: collectionHQ data extraction report X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=f842c545ff403f97e418d41413386352a2c58a67;p=contrib%2Fequinox.git collectionHQ data extraction report Scripts to produce a report of bibs and items from an Evergreen database for submission to collectionHQ. These scripts were sponsored by the King County Library System. Signed-off-by: Galen Charlton --- diff --git a/collectionHQ/README b/collectionHQ/README new file mode 100644 index 0000000..1a4659d --- /dev/null +++ b/collectionHQ/README @@ -0,0 +1,16 @@ +The scripts in this directory are used to produce +a periodic report in text format of bibs and holdings +in an Evergreen database for submission to collectionHQ. + +Installation and usage: + +[1] Run functions.sql in your (reporting) Evergreen database + to create the collectionHQ schema and a couple functions. +[2] Edit extract.sh, get_bibs.sql, and get_items.sql to + put in your library code and FTP credentials with collectionHQ and + email parameters for the notification email that extract.sh can + send. +[3] Run extract.sh manually or set up a periodic cronjob, for example: + +0 2 20 * * cd /home/esi/collectionHQ && ./extract.sh + diff --git a/collectionHQ/extract.sh b/collectionHQ/extract.sh new file mode 100755 index 0000000..1508aa3 --- /dev/null +++ b/collectionHQ/extract.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +LIBRARYNAME="LIBRARYCODE" # library code assigned by collectionHQ +DATE=`date +%Y%m%d` +FILE="$LIBRARYNAME""$DATE".DAT +FTPUSER="user" +FTPPASS="passwd" +FTPSITE="ftp.collectionhq.com" +EMAILFROM="you@example.org" +EMAILTO="thee@example.org" + +function get_data_from_sql { + echo The extract for $DATE has begun. | ./send-email.pl --from "$EMAILFROM" --to "$EMAILTO" --subject "collectionHQ extraction has begun" + date + echo Fetching bibs... + psql -A -t -U evergreen < get_bibs.sql 2>&1 | cut -c8- | perl -ne 'if (m/^[0-9]/) { print STDERR; } else { print; }' > bibs-$DATE.txt + date + echo Fetching items... + psql -A -t -U evergreen < get_items.sql 2>&1 | cut -c8- | perl -ne 'if (m/^[0-9]/) { print STDERR; } else { print; }' > items-$DATE.txt + date + echo done. +} + +function format_data { + echo "##HEADER##,##DAT##,##${DATE}##,##${LIBRARYNAME}##,,,##USA##" > $FILE + cat bibs-$DATE.txt >> $FILE + cat items-$DATE.txt >> $FILE + NUMBIBS=`wc -l bibs-$DATE.txt | cut -d' ' -f1` + NUMITEMS=`wc -l items-$DATE.txt | cut -d' ' -f1` + echo "##TRAILER##,$NUMBIBS,$NUMITEMS" >> $FILE +} + +function upload_data { + gzip --best $FILE + ftp -v -n $FTPSITE < '' THEN + RETURN '##' || value || '##'; + ELSE + RETURN ''; + END IF; + END; +$$ LANGUAGE PLPGSQL STRICT STABLE; + + +CREATE OR REPLACE FUNCTION collectionHQ.write_item_rows_to_stdout (TEXT) RETURNS TEXT AS $$ +-- Usage: SELECT collectionHQ.write_item_rows_to_stdout ('LIBRARYCODE'); + + DECLARE + item BIGINT; + authority_code ALIAS FOR $1; + lms_bib_id TEXT; + library_code TEXT; + bar_code TEXT; + last_use_date TEXT; + cumulative_use_total TEXT; + cumulative_use_current TEXT; + status TEXT; + date_added TEXT; + price TEXT; + purchase_code TEXT; + rotating_stock TEXT; + lib_supsel_tag TEXT; + gen_supsel_tag TEXT; + notes TEXT; + extract_date TEXT; + collection_code TEXT; + collection_code_level_2 TEXT; + filter_level_1 TEXT; + filter_level_2 TEXT; + filter_level_3 TEXT; + filter_level_4 TEXT; + isbn TEXT := ''; + output TEXT := ''; + arrived TIMESTAMPTZ; + num_rows INTEGER := 0; + + BEGIN + + FOR item IN + EXECUTE ('SELECT id FROM asset.copy WHERE NOT deleted ORDER BY id;') + LOOP + + EXECUTE ('SELECT cn.record FROM asset.call_number cn, asset.copy c WHERE c.call_number = cn.id AND c.id = ' || item || ';') INTO lms_bib_id; + EXECUTE (E'SELECT isbn[1] FROM reporter.materialized_simple_record r WHERE r.id = ' || lms_bib_id || ';') INTO isbn; + EXECUTE ('SELECT collectionHQ.attempt_price(price::TEXT) FROM asset.copy WHERE id = ' || item || ';') INTO price; + IF price IS NULL OR price = '' THEN + EXECUTE (E'SELECT collectionHQ.attempt_price(value) FROM metabib.real_full_rec WHERE record = ' || lms_bib_id || E' AND tag = \'020\' AND subfield = \'c\' LIMIT 1;') INTO price; + END IF; + EXECUTE (E'SELECT REPLACE(NOW()::DATE::TEXT, \'-\', \'\');') INTO extract_date; + EXECUTE ('SELECT ou.shortname FROM actor.org_unit ou, asset.copy c WHERE ou.id = c.circ_lib AND c.id = ' || item || ';') INTO library_code; + EXECUTE ('SELECT barcode FROM asset.copy WHERE id = ' || item || ';') INTO bar_code; + EXECUTE (E'SELECT REPLACE(xact_start::DATE::TEXT, \'-\', \'\') FROM action.circulation WHERE target_copy = ' || item || ' ORDER BY xact_start DESC LIMIT 1;') INTO last_use_date; + EXECUTE (E'SELECT circ_count FROM extend_reporter.full_circ_count WHERE id = ' || item || ';') INTO cumulative_use_total; + IF cumulative_use_total IS NULL THEN + cumulative_use_total := '0'; + END IF; + EXECUTE ('SELECT h.audit_time FROM asset.copy c, auditor.asset_copy_history h WHERE c.id = h.id AND c.circ_lib <> h.circ_lib AND c.id = ' || item || 'ORDER BY h.audit_time DESC LIMIT 1;') INTO arrived; + IF arrived IS NOT NULL THEN + EXECUTE('SELECT COUNT(*) FROM action.circulation WHERE target_copy = ' || item || ' AND xact_start > ' || quote_literal(arrived) || '::TIMESTAMPTZ;') INTO cumulative_use_current; + ELSE + cumulative_use_current := '0'; + END IF; + EXECUTE ('SELECT status FROM asset.copy WHERE id = ' || item || ';') INTO status; + EXECUTE (E'SELECT REPLACE(create_date::DATE::TEXT, \'-\', \'\') FROM asset.copy WHERE id = ' || item || ';') INTO date_added; + EXECUTE (E'SELECT CASE floating WHEN TRUE THEN \'Y\' ELSE NULL END FROM asset.copy WHERE id = ' || item || ';') INTO rotating_stock; + EXECUTE ('SELECT SUBSTRING(value FROM 1 FOR 100) FROM asset.copy_note WHERE owning_copy = ' || item || E' AND title ILIKE \'%collectionHQ%\' ORDER BY id LIMIT 1;') INTO notes; -- FIXME or we could compile and concatenate in another function + EXECUTE ('SELECT l.name FROM asset.copy c, asset.copy_location l WHERE c.location = l.id AND c.id = ' || item || ';') INTO collection_code; + EXECUTE ('SELECT v.label FROM asset.call_number v, asset.copy c WHERE v.id = c.call_number AND c.id = ' || item || ';') INTO filter_level_1; + + purchase_code := ''; -- FIXME do we want something else here? + lib_supsel_tag := ''; -- FIXME do we want something else here? + gen_supsel_tag := ''; -- FIXME do we want something else here? + collection_code_level_2 := ''; -- FIXME do we want something else here? + filter_level_2 := ''; -- FIXME do we want something else here? + filter_level_3 := ''; -- FIXME do we want something else here? + filter_level_4 := ''; -- FIXME do we want something else here? + + output := '##HOLD##,' + || lms_bib_id || ',' + || COALESCE(collectionHQ.quote(authority_code), '') || ',' + || COALESCE(collectionHQ.quote(library_code), '') || ',' + || COALESCE(collectionHQ.quote(bar_code), '') || ',' + || COALESCE(collectionHQ.quote(last_use_date), '') || ',' + || COALESCE(cumulative_use_total, '') || ',' + || COALESCE(cumulative_use_current, '') || ',' + || COALESCE(collectionHQ.quote(status), '') || ',' + || COALESCE(collectionHQ.quote(date_added), '') || ',' + || COALESCE(price, '') || ',' + || COALESCE(collectionHQ.quote(purchase_code), '') || ',' + || COALESCE(collectionHQ.quote(rotating_stock), '') || ',' + || COALESCE(collectionHQ.quote(lib_supsel_tag), '') || ',' + || COALESCE(collectionHQ.quote(gen_supsel_tag), '') || ',' + || COALESCE(collectionHQ.quote(notes), '') || ',' + || COALESCE(collectionHQ.quote(extract_date), '') || ',' + || COALESCE(collectionHQ.quote(collection_code), '') || ',' + || COALESCE(collectionHQ.quote(collection_code_level_2), '') || ',' + || COALESCE(collectionHQ.quote(filter_level_1), '') || ',' + || COALESCE(collectionHQ.quote(filter_level_2), '') || ',' + || COALESCE(collectionHQ.quote(filter_level_3), '') || ',' + || COALESCE(collectionHQ.quote(filter_level_4), '') || ',' + || COALESCE(collectionHQ.quote(isbn), ''); + + RAISE INFO '%', output; + + num_rows := num_rows + 1; + IF (num_rows::numeric % 10000.0 = 0.0) THEN RAISE INFO '% rows written', num_rows; END IF; + + END LOOP; + + RAISE INFO '% rows written in total.', num_rows; + + END; + +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION collectionHQ.write_bib_rows_to_stdout (TEXT) RETURNS TEXT AS $$ +-- Usage: SELECT collectionHQ.write_bib_rows_to_stdout('LIBRARYCODE'); + + DECLARE + library_service_code ALIAS FOR $1; + isbn TEXT := ''; + title TEXT := ''; + author TEXT := ''; + edition_num TEXT := ''; + publication_date TEXT := ''; + publisher TEXT := ''; + price TEXT := ''; + lms_item_type TEXT := ''; + class_num TEXT := ''; + extract_date TEXT := ''; + output TEXT := ''; + lms_bib_id BIGINT; + num_rows INTEGER := 0; + + BEGIN + + FOR lms_bib_id IN + EXECUTE ('SELECT id FROM biblio.record_entry;') + LOOP + + EXECUTE (E'SELECT isbn[1] FROM reporter.materialized_simple_record r WHERE r.id = ' || lms_bib_id || ';') INTO isbn; + EXECUTE ('SELECT SUBSTRING(title FROM 1 FOR 100) FROM reporter.materialized_simple_record r WHERE r.id = ' || lms_bib_id || ';') INTO title; + EXECUTE ('SELECT SUBSTRING(author FROM 1 FOR 50) FROM reporter.materialized_simple_record r WHERE r.id = ' || lms_bib_id || ';') INTO author; + EXECUTE (E'SELECT SUBSTRING(value FROM 1 FOR 20) FROM metabib.real_full_rec WHERE record = ' || lms_bib_id || E' AND tag = \'250\' AND subfield = \'a\' LIMIT 1;') INTO edition_num; + EXECUTE (E'SELECT collectionHQ.attempt_year(value) FROM metabib.real_full_rec WHERE record = ' || lms_bib_id || E' AND tag = \'260\' AND subfield = \'c\' LIMIT 1;') INTO publication_date; + EXECUTE (E'SELECT SUBSTRING(value FROM 1 FOR 100) FROM metabib.real_full_rec WHERE record = ' || lms_bib_id || E' AND tag = \'260\' AND subfield = \'b\' LIMIT 1;') INTO publisher; + EXECUTE (E'SELECT collectionHQ.attempt_price(value) FROM metabib.real_full_rec WHERE record = ' || lms_bib_id || E' AND tag = \'020\' AND subfield = \'c\' LIMIT 1;') INTO price; + EXECUTE ('SELECT circ_modifier FROM asset.copy c, asset.call_number cn WHERE cn.record = ' || lms_bib_id || ' AND cn.id = c.call_number AND NOT cn.deleted AND NOT c.deleted LIMIT 1;') INTO lms_item_type; + EXECUTE ('SELECT SUBSTRING(value FROM 1 FOR 20) FROM metabib.real_full_rec WHERE record = ' || lms_bib_id || E' AND tag = \'082\' AND subfield = \'a\' LIMIT 1;') INTO class_num; + EXECUTE (E'SELECT REPLACE(NOW()::DATE::TEXT, \'-\', \'\');') INTO extract_date; + + output := + '##BIB##,' + || lms_bib_id || ',' + || COALESCE(collectionHQ.quote(library_service_code), '') || ',' + || COALESCE(collectionHQ.quote(isbn), '') || ',' + || COALESCE(collectionHQ.quote(title), '') || ',' + || COALESCE(collectionHQ.quote(author), '') || ',' + || COALESCE(collectionHQ.quote(edition_num), '') || ',' + || COALESCE(collectionHQ.quote(publication_date), '') || ',' + || COALESCE(collectionHQ.quote(publisher), '') || ',' + || COALESCE(price, '') || ',' + || COALESCE(collectionHQ.quote(lms_item_type), '') || ',' + || COALESCE(collectionHQ.quote(class_num), '') || ',' + || COALESCE(collectionHQ.quote(extract_date), ''); + + RAISE INFO '%', output; + + num_rows := num_rows + 1; + IF (num_rows::numeric % 10000.0 = 0.0) THEN RAISE INFO '% rows written', num_rows; END IF; + + END LOOP; + + RAISE INFO '% rows written in total.', num_rows; + + END; + +$$ LANGUAGE plpgsql; diff --git a/collectionHQ/get_bibs.sql b/collectionHQ/get_bibs.sql new file mode 100644 index 0000000..fbae37d --- /dev/null +++ b/collectionHQ/get_bibs.sql @@ -0,0 +1 @@ +SELECT collectionHQ.write_bib_rows_to_stdout('LIBRARYCODE'); diff --git a/collectionHQ/get_items.sql b/collectionHQ/get_items.sql new file mode 100644 index 0000000..0836cf6 --- /dev/null +++ b/collectionHQ/get_items.sql @@ -0,0 +1 @@ +SELECT collectionHQ.write_item_rows_to_stdout('LIBRARYCODE'); diff --git a/collectionHQ/send-email.pl b/collectionHQ/send-email.pl new file mode 100755 index 0000000..2754544 --- /dev/null +++ b/collectionHQ/send-email.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl -w + +use strict; +use Email::Sender::Simple qw(sendmail); +use Email::Simple; +use Email::Simple::Creator; +use Email::Sender::Transport::SMTP qw(); +use Getopt::Long; + +my $to = 'esi@localhost.localdomain'; +my $from = $to; +my $subject = ""; +my $body = ""; + +my $result = GetOptions ( + "to=s" => \$to, + "from=s" => \$from, + "subject=s" => \$subject +); + +my $transport = Email::Sender::Transport::SMTP->new ({ + host => 'smtp.example.org', + port => 25 +}); + +while (<>) { $body .= $_; } + +my $email = Email::Simple->create( + header => [ + To => $to, + From => $from, + Subject => $subject, + ], + body => $body, +); + +sendmail($email, { transport => $transport }); + +=head1 NAME + +send-email.pl + +=head1 USAGE + +echo Hello world! | \ + + send-email.pl \ + + --from '"Cron" ' \ + + --to '"User" , "Someone Else" ' \ + + --subject "Stuff"