From: Dan Wells Date: Fri, 7 Jun 2013 18:57:48 +0000 (-0400) Subject: Stamping upgrade script for batch z39.50 search, match, overlay X-Git-Tag: sprint4-merge-nov22~3056 X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=d7376fdf678deeb4932388fb78ec4066de16b338;p=working%2FEvergreen.git Stamping upgrade script for batch z39.50 search, match, overlay Signed-off-by: Dan Wells --- diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index d49e249694..769ad25601 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps BEFORE INSERT OR UPDATE ON config.db_patch_dependencies FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates'); -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0794', :eg_version); -- ktomita/bshum +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0796', :eg_version); -- berick/dbwells CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/0795.schema.z39-batch-fetch-overlay.sql b/Open-ILS/src/sql/Pg/upgrade/0795.schema.z39-batch-fetch-overlay.sql new file mode 100644 index 0000000000..00d3104dec --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0795.schema.z39-batch-fetch-overlay.sql @@ -0,0 +1,107 @@ +BEGIN; + +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0795', :eg_version); -- berick/dbwells + +CREATE OR REPLACE FUNCTION + evergreen.z3950_attr_name_is_valid(TEXT) RETURNS BOOLEAN AS $func$ + SELECT EXISTS (SELECT 1 FROM config.z3950_attr WHERE name = $1); +$func$ LANGUAGE SQL STRICT IMMUTABLE; + +COMMENT ON FUNCTION evergreen.z3950_attr_name_is_valid(TEXT) IS $$ +Results in TRUE if there exists at least one config.z3950_attr +with the provided name. Used by config.z3950_index_field_map +to verify z3950_attr_type maps. +$$; + +CREATE TABLE config.z3950_index_field_map ( + id SERIAL PRIMARY KEY, + label TEXT NOT NULL, -- i18n + metabib_field INTEGER REFERENCES config.metabib_field(id), + record_attr TEXT REFERENCES config.record_attr_definition(name), + z3950_attr INTEGER REFERENCES config.z3950_attr(id), + z3950_attr_type TEXT,-- REFERENCES config.z3950_attr(name) + CONSTRAINT metabib_field_or_record_attr CHECK ( + metabib_field IS NOT NULL OR + record_attr IS NOT NULL + ), + CONSTRAINT attr_or_attr_type CHECK ( + z3950_attr IS NOT NULL OR + z3950_attr_type IS NOT NULL + ), + -- ensure the selected z3950_attr_type refers to a valid attr name + CONSTRAINT valid_z3950_attr_type CHECK ( + z3950_attr_type IS NULL OR + evergreen.z3950_attr_name_is_valid(z3950_attr_type) + ) +); + +-- seed data + +INSERT INTO config.z3950_index_field_map + (id, label, metabib_field, z3950_attr_type) VALUES +(1, oils_i18n_gettext(1, 'Title', 'czifm', 'label'), 5, 'title'), +(2, oils_i18n_gettext(2, 'Author', 'czifm', 'label'), 8, 'author'), +(3, oils_i18n_gettext(3, 'ISBN', 'czifm', 'label'), 18, 'isbn'), +(4, oils_i18n_gettext(4, 'ISSN', 'czifm', 'label'), 19, 'issn'), +(5, oils_i18n_gettext(5, 'LCCN', 'czifm', 'label'), 30, 'lccn'); + +INSERT INTO config.z3950_index_field_map + (id, label, record_attr, z3950_attr_type) VALUES +(6, oils_i18n_gettext(6, 'Pubdate', 'czifm', 'label'),'pubdate', 'pubdate'), +(7, oils_i18n_gettext(7, 'Item Type', 'czifm', 'label'),'item_type', 'item_type'); + + +-- let's leave room for more stock mappings +SELECT SETVAL('config.z3950_index_field_map_id_seq'::TEXT, 1000); + +INSERT INTO config.org_unit_setting_type + (name, grp, label, description, datatype) + VALUES ( + 'cat.z3950.batch.max_parallel', + 'cat', + oils_i18n_gettext( + 'cat.z3950.batch.max_parallel', + 'Maximum Parallel Z39.50 Batch Searches', + 'coust', + 'label' + ), + oils_i18n_gettext( + 'cat.z3950.batch.max_parallel', + 'The maximum number of Z39.50 searches that can be in-flight at any given time when performing batch Z39.50 searches', + 'coust', + 'description' + ), + 'integer' + ); + +INSERT INTO config.org_unit_setting_type + (name, grp, label, description, datatype) + VALUES ( + 'cat.z3950.batch.max_results', + 'cat', + oils_i18n_gettext( + 'cat.z3950.batch.max_results', + 'Maximum Z39.50 Batch Search Results', + 'coust', + 'label' + ), + oils_i18n_gettext( + 'cat.z3950.batch.max_results', + 'The maximum number of search results to retrieve and queue for each record + Z39 source during batch Z39.50 searches', + 'coust', + 'description' + ), + 'integer' + ); + +INSERT INTO vandelay.bib_attr_definition (id, code, description, xpath) + VALUES ( + 16, + 'zsource', + oils_i18n_gettext(16, 'Z39.50 Source', 'vqbrad', 'description'), + '//*[@tag="901"]/*[@code="z"]' + ); + + + +COMMIT; diff --git a/Open-ILS/src/sql/Pg/upgrade/0796.schema.vandelay_bucket_match.sql b/Open-ILS/src/sql/Pg/upgrade/0796.schema.vandelay_bucket_match.sql new file mode 100644 index 0000000000..f63e5d25a1 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0796.schema.vandelay_bucket_match.sql @@ -0,0 +1,136 @@ +BEGIN; + +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0796', :eg_version); -- berick/dbwells + +ALTER TABLE vandelay.bib_queue ADD COLUMN match_bucket + INTEGER REFERENCES container.biblio_record_entry_bucket(id) + ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED; + +CREATE OR REPLACE FUNCTION vandelay.match_bib_record() RETURNS TRIGGER AS $func$ +DECLARE + incoming_existing_id TEXT; + test_result vandelay.match_set_test_result%ROWTYPE; + tmp_rec BIGINT; + match_set INT; + match_bucket INT; +BEGIN + IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN + RETURN NEW; + END IF; + + DELETE FROM vandelay.bib_match WHERE queued_record = NEW.id; + + SELECT q.match_set INTO match_set FROM vandelay.bib_queue q WHERE q.id = NEW.queue; + + IF match_set IS NOT NULL THEN + NEW.quality := vandelay.measure_record_quality( NEW.marc, match_set ); + END IF; + + -- Perfect matches on 901$c exit early with a match with high quality. + incoming_existing_id := + oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]', NEW.marc); + + IF incoming_existing_id IS NOT NULL AND incoming_existing_id != '' THEN + SELECT id INTO tmp_rec FROM biblio.record_entry WHERE id = incoming_existing_id::bigint; + IF tmp_rec IS NOT NULL THEN + INSERT INTO vandelay.bib_match (queued_record, eg_record, match_score, quality) + SELECT + NEW.id, + b.id, + 9999, + -- note: no match_set means quality==0 + vandelay.measure_record_quality( b.marc, match_set ) + FROM biblio.record_entry b + WHERE id = incoming_existing_id::bigint; + END IF; + END IF; + + IF match_set IS NULL THEN + RETURN NEW; + END IF; + + SELECT q.match_bucket INTO match_bucket FROM vandelay.bib_queue q WHERE q.id = NEW.queue; + + FOR test_result IN SELECT * FROM + vandelay.match_set_test_marcxml(match_set, NEW.marc, match_bucket) LOOP + + INSERT INTO vandelay.bib_match ( queued_record, eg_record, match_score, quality ) + SELECT + NEW.id, + test_result.record, + test_result.quality, + vandelay.measure_record_quality( b.marc, match_set ) + FROM biblio.record_entry b + WHERE id = test_result.record; + + END LOOP; + + RETURN NEW; +END; +$func$ LANGUAGE PLPGSQL; + + +DROP FUNCTION IF EXISTS vandelay.match_set_test_marcxml(INTEGER, TEXT); + +CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml( + match_set_id INTEGER, record_xml TEXT, bucket_id INTEGER +) RETURNS SETOF vandelay.match_set_test_result AS $$ +DECLARE + tags_rstore HSTORE; + svf_rstore HSTORE; + coal TEXT; + joins TEXT; + query_ TEXT; + wq TEXT; + qvalue INTEGER; + rec RECORD; +BEGIN + tags_rstore := vandelay.flatten_marc_hstore(record_xml); + svf_rstore := vandelay.extract_rec_attrs(record_xml); + + CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER); + CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT); + + -- generate the where clause and return that directly (into wq), and as + -- a side-effect, populate the _vandelay_tmp_[qj]rows tables. + wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore); + + query_ := 'SELECT DISTINCT(record), '; + + -- qrows table is for the quality bits we add to the SELECT clause + SELECT ARRAY_TO_STRING( + ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + ' + ) INTO coal FROM _vandelay_tmp_qrows; + + -- our query string so far is the SELECT clause and the inital FROM. + -- no JOINs yet nor the WHERE clause + query_ := query_ || coal || ' AS quality ' || E'\n'; + + -- jrows table is for the joins we must make (and the real text conditions) + SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins + FROM _vandelay_tmp_jrows; + + -- add those joins and the where clause to our query. + query_ := query_ || joins || E'\n'; + + -- join the record bucket + IF bucket_id IS NOT NULL THEN + query_ := query_ || 'JOIN container.biblio_record_entry_bucket_item ' || + 'brebi ON (brebi.target_biblio_record_entry = record ' || + 'AND brebi.bucket = ' || bucket_id || E')\n'; + END IF; + + query_ := query_ || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted'; + + -- this will return rows of record,quality + FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP + RETURN NEXT rec; + END LOOP; + + DROP TABLE _vandelay_tmp_qrows; + DROP TABLE _vandelay_tmp_jrows; + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +COMMIT; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.z39-batch-fetch-overlay.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.z39-batch-fetch-overlay.sql deleted file mode 100644 index 44fff4af4b..0000000000 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.z39-batch-fetch-overlay.sql +++ /dev/null @@ -1,107 +0,0 @@ -BEGIN; - --- TODO version check - -CREATE OR REPLACE FUNCTION - evergreen.z3950_attr_name_is_valid(TEXT) RETURNS BOOLEAN AS $func$ - SELECT EXISTS (SELECT 1 FROM config.z3950_attr WHERE name = $1); -$func$ LANGUAGE SQL STRICT IMMUTABLE; - -COMMENT ON FUNCTION evergreen.z3950_attr_name_is_valid(TEXT) IS $$ -Results in TRUE if there exists at least one config.z3950_attr -with the provided name. Used by config.z3950_index_field_map -to verify z3950_attr_type maps. -$$; - -CREATE TABLE config.z3950_index_field_map ( - id SERIAL PRIMARY KEY, - label TEXT NOT NULL, -- i18n - metabib_field INTEGER REFERENCES config.metabib_field(id), - record_attr TEXT REFERENCES config.record_attr_definition(name), - z3950_attr INTEGER REFERENCES config.z3950_attr(id), - z3950_attr_type TEXT,-- REFERENCES config.z3950_attr(name) - CONSTRAINT metabib_field_or_record_attr CHECK ( - metabib_field IS NOT NULL OR - record_attr IS NOT NULL - ), - CONSTRAINT attr_or_attr_type CHECK ( - z3950_attr IS NOT NULL OR - z3950_attr_type IS NOT NULL - ), - -- ensure the selected z3950_attr_type refers to a valid attr name - CONSTRAINT valid_z3950_attr_type CHECK ( - z3950_attr_type IS NULL OR - evergreen.z3950_attr_name_is_valid(z3950_attr_type) - ) -); - --- seed data - -INSERT INTO config.z3950_index_field_map - (id, label, metabib_field, z3950_attr_type) VALUES -(1, oils_i18n_gettext(1, 'Title', 'czifm', 'label'), 5, 'title'), -(2, oils_i18n_gettext(2, 'Author', 'czifm', 'label'), 8, 'author'), -(3, oils_i18n_gettext(3, 'ISBN', 'czifm', 'label'), 18, 'isbn'), -(4, oils_i18n_gettext(4, 'ISSN', 'czifm', 'label'), 19, 'issn'), -(5, oils_i18n_gettext(5, 'LCCN', 'czifm', 'label'), 30, 'lccn'); - -INSERT INTO config.z3950_index_field_map - (id, label, record_attr, z3950_attr_type) VALUES -(6, oils_i18n_gettext(6, 'Pubdate', 'czifm', 'label'),'pubdate', 'pubdate'), -(7, oils_i18n_gettext(7, 'Item Type', 'czifm', 'label'),'item_type', 'item_type'); - - --- let's leave room for more stock mappings -SELECT SETVAL('config.z3950_index_field_map_id_seq'::TEXT, 1000); - -INSERT INTO config.org_unit_setting_type - (name, grp, label, description, datatype) - VALUES ( - 'cat.z3950.batch.max_parallel', - 'cat', - oils_i18n_gettext( - 'cat.z3950.batch.max_parallel', - 'Maximum Parallel Z39.50 Batch Searches', - 'coust', - 'label' - ), - oils_i18n_gettext( - 'cat.z3950.batch.max_parallel', - 'The maximum number of Z39.50 searches that can be in-flight at any given time when performing batch Z39.50 searches', - 'coust', - 'description' - ), - 'integer' - ); - -INSERT INTO config.org_unit_setting_type - (name, grp, label, description, datatype) - VALUES ( - 'cat.z3950.batch.max_results', - 'cat', - oils_i18n_gettext( - 'cat.z3950.batch.max_results', - 'Maximum Z39.50 Batch Search Results', - 'coust', - 'label' - ), - oils_i18n_gettext( - 'cat.z3950.batch.max_results', - 'The maximum number of search results to retrieve and queue for each record + Z39 source during batch Z39.50 searches', - 'coust', - 'description' - ), - 'integer' - ); - -INSERT INTO vandelay.bib_attr_definition (id, code, description, xpath) - VALUES ( - 16, - 'zsource', - oils_i18n_gettext(16, 'Z39.50 Source', 'vqbrad', 'description'), - '//*[@tag="901"]/*[@code="z"]' - ); - - - -COMMIT; diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.vandelay_bucket_match.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.vandelay_bucket_match.sql deleted file mode 100644 index ba2f79fb7a..0000000000 --- a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.vandelay_bucket_match.sql +++ /dev/null @@ -1,136 +0,0 @@ -BEGIN; - --- TODO version check - -ALTER TABLE vandelay.bib_queue ADD COLUMN match_bucket - INTEGER REFERENCES container.biblio_record_entry_bucket(id) - ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED; - -CREATE OR REPLACE FUNCTION vandelay.match_bib_record() RETURNS TRIGGER AS $func$ -DECLARE - incoming_existing_id TEXT; - test_result vandelay.match_set_test_result%ROWTYPE; - tmp_rec BIGINT; - match_set INT; - match_bucket INT; -BEGIN - IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN - RETURN NEW; - END IF; - - DELETE FROM vandelay.bib_match WHERE queued_record = NEW.id; - - SELECT q.match_set INTO match_set FROM vandelay.bib_queue q WHERE q.id = NEW.queue; - - IF match_set IS NOT NULL THEN - NEW.quality := vandelay.measure_record_quality( NEW.marc, match_set ); - END IF; - - -- Perfect matches on 901$c exit early with a match with high quality. - incoming_existing_id := - oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]', NEW.marc); - - IF incoming_existing_id IS NOT NULL AND incoming_existing_id != '' THEN - SELECT id INTO tmp_rec FROM biblio.record_entry WHERE id = incoming_existing_id::bigint; - IF tmp_rec IS NOT NULL THEN - INSERT INTO vandelay.bib_match (queued_record, eg_record, match_score, quality) - SELECT - NEW.id, - b.id, - 9999, - -- note: no match_set means quality==0 - vandelay.measure_record_quality( b.marc, match_set ) - FROM biblio.record_entry b - WHERE id = incoming_existing_id::bigint; - END IF; - END IF; - - IF match_set IS NULL THEN - RETURN NEW; - END IF; - - SELECT q.match_bucket INTO match_bucket FROM vandelay.bib_queue q WHERE q.id = NEW.queue; - - FOR test_result IN SELECT * FROM - vandelay.match_set_test_marcxml(match_set, NEW.marc, match_bucket) LOOP - - INSERT INTO vandelay.bib_match ( queued_record, eg_record, match_score, quality ) - SELECT - NEW.id, - test_result.record, - test_result.quality, - vandelay.measure_record_quality( b.marc, match_set ) - FROM biblio.record_entry b - WHERE id = test_result.record; - - END LOOP; - - RETURN NEW; -END; -$func$ LANGUAGE PLPGSQL; - - -DROP FUNCTION IF EXISTS vandelay.match_set_test_marcxml(INTEGER, TEXT); - -CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml( - match_set_id INTEGER, record_xml TEXT, bucket_id INTEGER -) RETURNS SETOF vandelay.match_set_test_result AS $$ -DECLARE - tags_rstore HSTORE; - svf_rstore HSTORE; - coal TEXT; - joins TEXT; - query_ TEXT; - wq TEXT; - qvalue INTEGER; - rec RECORD; -BEGIN - tags_rstore := vandelay.flatten_marc_hstore(record_xml); - svf_rstore := vandelay.extract_rec_attrs(record_xml); - - CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER); - CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT); - - -- generate the where clause and return that directly (into wq), and as - -- a side-effect, populate the _vandelay_tmp_[qj]rows tables. - wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore); - - query_ := 'SELECT DISTINCT(record), '; - - -- qrows table is for the quality bits we add to the SELECT clause - SELECT ARRAY_TO_STRING( - ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + ' - ) INTO coal FROM _vandelay_tmp_qrows; - - -- our query string so far is the SELECT clause and the inital FROM. - -- no JOINs yet nor the WHERE clause - query_ := query_ || coal || ' AS quality ' || E'\n'; - - -- jrows table is for the joins we must make (and the real text conditions) - SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins - FROM _vandelay_tmp_jrows; - - -- add those joins and the where clause to our query. - query_ := query_ || joins || E'\n'; - - -- join the record bucket - IF bucket_id IS NOT NULL THEN - query_ := query_ || 'JOIN container.biblio_record_entry_bucket_item ' || - 'brebi ON (brebi.target_biblio_record_entry = record ' || - 'AND brebi.bucket = ' || bucket_id || E')\n'; - END IF; - - query_ := query_ || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted'; - - -- this will return rows of record,quality - FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP - RETURN NEXT rec; - END LOOP; - - DROP TABLE _vandelay_tmp_qrows; - DROP TABLE _vandelay_tmp_jrows; - RETURN; -END; -$$ LANGUAGE PLPGSQL; - -COMMIT;