From 9e1d009950edc1d2b9bea1aa11ff047d7f9c802d Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Mon, 13 Aug 2012 16:10:38 -0400 Subject: [PATCH] Link checker: DB layer fixes Function for ingesting URLs from container item + session New container type for url verification sessions Signed-off-by: Mike Rylander --- Open-ILS/src/sql/Pg/076.functions.url_verify.sql | 43 ++++++++++++++++++++++ Open-ILS/src/sql/Pg/950.data.seed-values.sql | 1 + .../src/sql/Pg/upgrade/XXXX.schema.url_verify.sql | 5 +++ .../sql/Pg/upgrade/YYYY.functions.url_verify.sql | 42 +++++++++++++++++++++ 4 files changed, 91 insertions(+) diff --git a/Open-ILS/src/sql/Pg/076.functions.url_verify.sql b/Open-ILS/src/sql/Pg/076.functions.url_verify.sql index 23dedab0ab..89478ad6f1 100644 --- a/Open-ILS/src/sql/Pg/076.functions.url_verify.sql +++ b/Open-ILS/src/sql/Pg/076.functions.url_verify.sql @@ -60,5 +60,48 @@ CREATE TRIGGER ingest_url_tgr BEFORE INSERT ON url_verify.url FOR EACH ROW EXECUTE PROCEDURE url_verify.ingest_url(); +CREATE OR REPLACE FUNCTION url_verify.extract_urls ( session_id INT, item_id INT ) RETURNS INT AS $$ +DECLARE + current_tag TEXT; + current_sf TEXT; + current_url TEXT; + current_ord INT; + current_url_pos INT; + current_selector url_verify.url_selector%ROWTYPE; +BEGIN + current_ord := 1; + + FOR current_selector IN SELECT * FROM url_verify.url_selector s WHERE s.session = session_id LOOP + current_url_pos := 1; + LOOP + SELECT (XPATH(current_selector.xpath || '/text()', b.marc))[current_url_pos]::TEXT INTO current_url + FROM biblio.record_entry b + JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id) + WHERE c.id = item_id; + + EXIT WHEN current_url IS NULL; + + SELECT (XPATH(current_selector.xpath || '/../@tag', b.marc))[current_url_pos]::TEXT INTO current_tag + FROM biblio.record_entry b + JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id) + WHERE c.id = item_id; + + SELECT (XPATH(current_selector.xpath || '/@subfield', b.marc))[current_url_pos]::TEXT INTO current_sf + FROM biblio.record_entry b + JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id) + WHERE c.id = item_id; + + INSERT INTO url_verify.url (item, url_selector, tag, subfield, ord, full_url) + VALUES ( item_id, current_selector.id, current_tag, current_sf, current_ord, current_url); + + current_url_pos := current_url_pos + 1; + current_ord := current_ord + 1; + END LOOP; + END LOOP; + + RETURN current_ord - 1; +END; +$$ LANGUAGE PLPGSQL; + COMMIT; diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql index fe4952cff0..d6f0547917 100644 --- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql +++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql @@ -4859,6 +4859,7 @@ INSERT INTO container.biblio_record_entry_bucket_type (code,label) VALUES ('staf INSERT INTO container.biblio_record_entry_bucket_type (code,label) VALUES ('bookbag', oils_i18n_gettext('bookbag', 'Book Bag', 'cbrebt', 'label')); INSERT INTO container.biblio_record_entry_bucket_type (code,label) VALUES ('reading_list', oils_i18n_gettext('reading_list', 'Reading List', 'cbrebt', 'label')); INSERT INTO container.biblio_record_entry_bucket_type (code,label) VALUES ('template_merge',oils_i18n_gettext('template_merge','Template Merge Container', 'cbrebt', 'label')); +INSERT INTO container.biblio_record_entry_bucket_type (code,label) VALUES ('url_verify', oils_i18n_gettext('url_verify', 'URL Verification Queue', 'cbrebt', 'label')); INSERT INTO container.user_bucket_type (code,label) VALUES ('misc', oils_i18n_gettext('misc', 'Miscellaneous', 'cubt', 'label')); INSERT INTO container.user_bucket_type (code,label) VALUES ('folks', oils_i18n_gettext('folks', 'Friends', 'cubt', 'label')); diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql index e001f9f7e4..82da8ee24c 100644 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql @@ -1,3 +1,7 @@ +BEGIN; + +INSERT INTO container.biblio_record_entry_bucket_type (code, label) VALUES ('url_verify', 'URL Verification Queue'); + DROP SCHEMA IF EXISTS url_verify CASCADE; CREATE SCHEMA url_verify; @@ -80,3 +84,4 @@ CREATE TABLE url_verify.filter_set ( CONSTRAINT uvfs_name_once_per_lib UNIQUE (name, owning_lib) ); +COMMIT; diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql index c63c2d9756..2990382a49 100644 --- a/Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql +++ b/Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql @@ -44,6 +44,48 @@ CREATE TRIGGER ingest_url_tgr BEFORE INSERT ON url_verify.url FOR EACH ROW EXECUTE PROCEDURE url_verify.ingest_url(); +CREATE OR REPLACE FUNCTION url_verify.extract_urls ( session_id INT, item_id INT ) RETURNS INT AS $$ +DECLARE + current_tag TEXT; + current_sf TEXT; + current_url TEXT; + current_ord INT; + current_url_pos INT; + current_selector url_verify.url_selector%ROWTYPE; +BEGIN + current_ord := 1; + + FOR current_selector IN SELECT * FROM url_verify.url_selector s WHERE s.session = session_id LOOP + current_url_pos := 1; + LOOP + SELECT (XPATH(current_selector.xpath || '/text()', b.marc))[current_url_pos]::TEXT INTO current_url + FROM biblio.record_entry b + JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id) + WHERE c.id = item_id; + + EXIT WHEN current_url IS NULL; + + SELECT (XPATH(current_selector.xpath || '/../@tag', b.marc))[current_url_pos]::TEXT INTO current_tag + FROM biblio.record_entry b + JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id) + WHERE c.id = item_id; + + SELECT (XPATH(current_selector.xpath || '/@subfield', b.marc))[current_url_pos]::TEXT INTO current_sf + FROM biblio.record_entry b + JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id) + WHERE c.id = item_id; + + INSERT INTO url_verify.url (item, url_selector, tag, subfield, ord, full_url) + VALUES ( item_id, current_selector.id, current_tag, current_sf, current_ord, current_url); + + current_url_pos := current_url_pos + 1; + current_ord := current_ord + 1; + END LOOP; + END LOOP; + + RETURN current_ord - 1; +END; +$$ LANGUAGE PLPGSQL; COMMIT; -- 2.11.0