From: dbs Date: Fri, 18 Mar 2011 13:36:18 +0000 (+0000) Subject: Add an extension for tracking bib updates of interest to external search engine X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=c67ec7184f86babe079684455da11e282ef8ef2a;p=contrib%2FConifer.git Add an extension for tracking bib updates of interest to external search engine In addition to adding basic triggers on asset.call_number and biblio.record_entry, we define a 'solr' role. Further work needs to be done to lock down the privileges on that role. git-svn-id: svn://svn.open-ils.org/ILS-Contrib/conifer/branches/rel_1_6_1@1264 6d9bc8c9-1ec2-4278-b937-99fde70a366f --- diff --git a/src/sql/Pg/solr.sql b/src/sql/Pg/solr.sql new file mode 100644 index 0000000000..921cbed809 --- /dev/null +++ b/src/sql/Pg/solr.sql @@ -0,0 +1,124 @@ +-- The goal of this extension is to create a simple table containing +-- a list of MARC records by library that have been inserted, updated, +-- or deleted since a given time. In theory, you should be able to point +-- Solr's DataImportHandler at it to suck in the pertinent data. +-- +-- To make things a bit simpler, the solr.bib_updates_by_lib() function +-- returns the bib record ID, the bib record MARCXML, and the change +-- timestamp for a given set of libraries since a given time. To call it, +-- pass a timestamp and an array of library IDs; for example: +-- +-- SELECT solr.bib_updates_by_lib('2011-03-18 09:19:00 -5:00', '{4,103}'); +-- +-- WARNING: This is far from perfect! Some known gotchas include: +-- +-- 1. solr.notify_solr_bib() does not take transparency into account; we +-- don't use it, but others adopting this extension might as well be +-- aware. +-- 2. The additional overhead introduced by the triggers on bre and acn +-- have not been tested to determine how much performance impact they +-- introduce. +-- 3. There may be false "DELETE" incidents retrieved when the last callnumber +-- for library A is deleted, but library B still has a non-deleted +-- callnumber. + +DROP SCHEMA solr CASCADE; + +BEGIN; +CREATE SCHEMA solr; + +CREATE TABLE solr.bib_updates ( + record BIGINT, + update_type TEXT, + owning_lib INT, + touched TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +CREATE INDEX solr_bib_update_type ON solr.bib_updates(touched, owning_lib, update_type); + +CREATE FUNCTION solr.notify_solr_bib() RETURNS trigger AS $notify_solr$ +DECLARE touch_type TEXT; +BEGIN + -- If the MARC record isn't updated, we don't care + IF (TG_OP = 'UPDATE' AND NEW.marc = OLD.marc) THEN + RETURN NULL; + ELSIF (TG_OP = 'DELETE' OR TG_OP = 'TRUNCATE' OR (NEW.deleted IS TRUE AND OLD.deleted IS FALSE)) IS TRUE THEN + touch_type = 'DELETE'; + INSERT INTO solr.bib_updates (record, update_type, owning_lib) + SELECT DISTINCT NEW.id, touch_type, acn.owning_lib + FROM asset.call_number acn + WHERE acn.record = NEW.id; + ELSE + touch_type = 'UPDATE'; + INSERT INTO solr.bib_updates (record, update_type, owning_lib) + SELECT DISTINCT NEW.id, touch_type, acn.owning_lib + FROM asset.call_number acn + WHERE acn.record = NEW.id AND acn.deleted IS FALSE; + END IF; + + RETURN NULL; +END; +$notify_solr$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION solr.notify_solr_bib() IS $about$ +Adds an entry to the solr.bib_updates table when a bib record is +updated (if the MARC has changed) or deleted (generally when all +call numbers attached to the bib record have been deleted). +$about$; + +DROP TRIGGER IF EXISTS notify_solr ON biblio.record_entry; + +CREATE TRIGGER notify_solr + AFTER UPDATE OR DELETE ON biblio.record_entry + FOR EACH ROW EXECUTE PROCEDURE solr.notify_solr_bib(); + +CREATE FUNCTION solr.notify_solr_call_number() RETURNS trigger AS $notify_solr$ +DECLARE callnum_cnt INT; +BEGIN + + IF (TG_OP = 'INSERT') THEN + -- Update the bib_updates table when a call number is added for + -- a given library + INSERT INTO solr.bib_updates (record, update_type, owning_lib) + VALUES (NEW.record, 'INSERT', NEW.owning_lib); + + ELSIF (TG_OP = 'TRUNCATE' OR TG_OP = 'DELETE' OR (NEW.deleted IS TRUE AND OLD.deleted IS FALSE)) THEN + + SELECT COUNT(*) INTO callnum_cnt + FROM asset.call_number acn + WHERE acn.record = NEW.record + AND acn.owning_lib = NEW.owning_lib + AND acn.deleted IS FALSE + AND acn.id <> NEW.id; + + IF (callnum_cnt = 0) THEN + INSERT INTO solr.bib_updates (record, update_type, owning_lib) + VALUES (NEW.record, 'DELETE', NEW.owning_lib); + END IF; + END IF; + + RETURN NULL; +END; +$notify_solr$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS notify_solr_call_number ON asset.call_number; + +CREATE TRIGGER notify_solr_call_number + AFTER INSERT OR DELETE OR UPDATE ON asset.call_number + FOR EACH ROW EXECUTE PROCEDURE solr.notify_solr_call_number(); + +COMMIT; + +CREATE ROLE solr; +GRANT USAGE ON SCHEMA biblio TO solr; +GRANT USAGE ON SCHEMA solr TO solr; +GRANT SELECT ON TABLE solr.bib_updates TO solr; +GRANT SELECT ON TABLE biblio.record_entry TO solr; + +CREATE OR REPLACE FUNCTION solr.bib_updates_by_lib (since TIMESTAMP, libs INT[]) + RETURNS TABLE (record BIGINT, touch_type TEXT, touched TIMESTAMP) AS +$bibs_by_lib$ + SELECT DISTINCT bre.id, bre.marc, sbu.touched + FROM biblio.record_entry bre + INNER JOIN solr.bib_updates sbu ON sbu.record = bre.id + WHERE sbu.touched > $1 AND sbu.owning_lib = ANY ($2); +$bibs_by_lib$ LANGUAGE SQL;