BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0737', :eg_version); -- dyrcona/dbs
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0738', :eg_version); -- senator/dbwells
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
--- /dev/null
+BEGIN;
+
+-- 0738.schema.vandelay.import-match-no-like-any.sql
+
+SELECT evergreen.upgrade_deps_block_check('0738', :eg_version);
+
+CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml(
+ match_set_id INTEGER, record_xml TEXT
+) RETURNS SETOF vandelay.match_set_test_result AS $$
+DECLARE
+ tags_rstore HSTORE;
+ svf_rstore HSTORE;
+ coal TEXT;
+ joins TEXT;
+ query_ TEXT;
+ wq TEXT;
+ qvalue INTEGER;
+ rec RECORD;
+BEGIN
+ tags_rstore := vandelay.flatten_marc_hstore(record_xml);
+ svf_rstore := vandelay.extract_rec_attrs(record_xml);
+
+ CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
+ CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
+
+ -- generate the where clause and return that directly (into wq), and as
+ -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
+ wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
+
+ query_ := 'SELECT DISTINCT(record), ';
+
+ -- qrows table is for the quality bits we add to the SELECT clause
+ SELECT ARRAY_TO_STRING(
+ ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + '
+ ) INTO coal FROM _vandelay_tmp_qrows;
+
+ -- our query string so far is the SELECT clause and the inital FROM.
+ -- no JOINs yet nor the WHERE clause
+ query_ := query_ || coal || ' AS quality ' || E'\n';
+
+ -- jrows table is for the joins we must make (and the real text conditions)
+ SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
+ FROM _vandelay_tmp_jrows;
+
+ -- add those joins and the where clause to our query.
+ query_ := query_ || joins || E'\n' || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted';
+
+ -- this will return rows of record,quality
+ FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
+ RETURN NEXT rec;
+ END LOOP;
+
+ DROP TABLE _vandelay_tmp_qrows;
+ DROP TABLE _vandelay_tmp_jrows;
+ RETURN;
+END;
+
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
+ match_set_id INTEGER,
+ tags_rstore HSTORE
+) RETURNS TEXT AS $$
+DECLARE
+ root vandelay.match_set_point;
+BEGIN
+ SELECT * INTO root FROM vandelay.match_set_point
+ WHERE parent IS NULL AND match_set = match_set_id;
+
+ RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore);
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
+ node vandelay.match_set_point,
+ tags_rstore HSTORE
+) RETURNS TEXT AS $$
+DECLARE
+ q TEXT;
+ i INTEGER;
+ this_op TEXT;
+ children INTEGER[];
+ child vandelay.match_set_point;
+BEGIN
+ SELECT ARRAY_ACCUM(id) INTO children FROM vandelay.match_set_point
+ WHERE parent = node.id;
+
+ IF ARRAY_LENGTH(children, 1) > 0 THEN
+ this_op := vandelay._get_expr_render_one(node);
+ q := '(';
+ i := 1;
+ WHILE children[i] IS NOT NULL LOOP
+ SELECT * INTO child FROM vandelay.match_set_point
+ WHERE id = children[i];
+ IF i > 1 THEN
+ q := q || ' ' || this_op || ' ';
+ END IF;
+ i := i + 1;
+ q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore);
+ END LOOP;
+ q := q || ')';
+ RETURN q;
+ ELSIF node.bool_op IS NULL THEN
+ PERFORM vandelay._get_expr_push_qrow(node);
+ PERFORM vandelay._get_expr_push_jrow(node, tags_rstore);
+ RETURN vandelay._get_expr_render_one(node);
+ ELSE
+ RETURN '';
+ END IF;
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
+ node vandelay.match_set_point,
+ tags_rstore HSTORE
+) RETURNS VOID AS $$
+DECLARE
+ jrow TEXT;
+ my_alias TEXT;
+ op TEXT;
+ tagkey TEXT;
+ caseless BOOL;
+ jrow_count INT;
+ my_using TEXT;
+ my_join TEXT;
+BEGIN
+ -- remember $1 is tags_rstore, and $2 is svf_rstore
+
+ caseless := FALSE;
+ SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
+ IF jrow_count > 0 THEN
+ my_using := ' USING (record)';
+ my_join := 'FULL OUTER JOIN';
+ ELSE
+ my_using := '';
+ my_join := 'FROM';
+ END IF;
+
+ IF node.tag IS NOT NULL THEN
+ caseless := (node.tag IN ('020', '022', '024'));
+ tagkey := node.tag;
+ IF node.subfield IS NOT NULL THEN
+ tagkey := tagkey || node.subfield;
+ END IF;
+ END IF;
+
+ IF node.negate THEN
+ IF caseless THEN
+ op := 'NOT LIKE';
+ ELSE
+ op := '<>';
+ END IF;
+ ELSE
+ IF caseless THEN
+ op := 'LIKE';
+ ELSE
+ op := '=';
+ END IF;
+ END IF;
+
+ my_alias := 'n' || node.id::TEXT;
+
+ jrow := my_join || ' (SELECT *, ';
+ IF node.tag IS NOT NULL THEN
+ jrow := jrow || node.quality ||
+ ' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' ||
+ node.tag || '''';
+ IF node.subfield IS NOT NULL THEN
+ jrow := jrow || ' AND mfr.subfield = ''' ||
+ node.subfield || '''';
+ END IF;
+ jrow := jrow || ' AND (';
+ jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
+ jrow := jrow || ')) ' || my_alias || my_using || E'\n';
+ ELSE -- svf
+ jrow := jrow || 'id AS record, ' || node.quality ||
+ ' AS quality FROM metabib.record_attr mra WHERE mra.attrs->''' ||
+ node.svf || ''' ' || op || ' $2->''' || node.svf || ''') ' ||
+ my_alias || my_using || E'\n';
+ END IF;
+ INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
+ caseless BOOLEAN,
+ op TEXT,
+ tags_rstore HSTORE,
+ tagkey TEXT
+) RETURNS TEXT AS $$
+DECLARE
+ result TEXT;
+ i INT;
+ vals TEXT[];
+BEGIN
+ i := 1;
+ vals := tags_rstore->tagkey;
+ result := '';
+
+ WHILE TRUE LOOP
+ IF i > 1 THEN
+ IF vals[i] IS NULL THEN
+ EXIT;
+ ELSE
+ result := result || ' OR ';
+ END IF;
+ END IF;
+
+ IF caseless THEN
+ result := result || 'LOWER(mfr.value) ' || op;
+ ELSE
+ result := result || 'mfr.value ' || op;
+ END IF;
+
+ result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
+
+ IF vals[i] IS NULL THEN
+ EXIT;
+ END IF;
+ i := i + 1;
+ END LOOP;
+
+ RETURN result;
+
+END;
+$$ LANGUAGE PLPGSQL;
+
+-- drop old versions of these functions with fewer args
+DROP FUNCTION vandelay.get_expr_from_match_set( INTEGER );
+DROP FUNCTION vandelay.get_expr_from_match_set_point( vandelay.match_set_point );
+DROP FUNCTION vandelay._get_expr_push_jrow( vandelay.match_set_point );
+
+-- This next index might fully supplant an existing one but leaving both for now
+-- (they are not too large)
+-- The reason we need this index is to ensure that the query parser always
+-- prefers this index over the simpler tag/subfield index, as this greatly
+-- increases Vandelay overlay speed for these identifiers, especially when
+-- a record has many of these fields (around > 4-6 seems like the cutoff
+-- on at least one PG9.1 system)
+-- A similar index could be added for other fields (e.g. 010), but one should
+-- leave out the LOWER() in all other cases.
+-- TODO: verify whether we can discard the non tag/subfield/substring version
+-- (metabib_full_rec_isxn_caseless_idx)
+CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
+ ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
+ WHERE tag IN ('020', '022', '024');
+
+COMMIT;
+
+++ /dev/null
-BEGIN;
-
--- XXXX.schema.vandelay.import-match-no-like-any.sql
-
-SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
-
-CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml(
- match_set_id INTEGER, record_xml TEXT
-) RETURNS SETOF vandelay.match_set_test_result AS $$
-DECLARE
- tags_rstore HSTORE;
- svf_rstore HSTORE;
- coal TEXT;
- joins TEXT;
- query_ TEXT;
- wq TEXT;
- qvalue INTEGER;
- rec RECORD;
-BEGIN
- tags_rstore := vandelay.flatten_marc_hstore(record_xml);
- svf_rstore := vandelay.extract_rec_attrs(record_xml);
-
- CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
- CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
-
- -- generate the where clause and return that directly (into wq), and as
- -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
- wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
-
- query_ := 'SELECT DISTINCT(record), ';
-
- -- qrows table is for the quality bits we add to the SELECT clause
- SELECT ARRAY_TO_STRING(
- ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + '
- ) INTO coal FROM _vandelay_tmp_qrows;
-
- -- our query string so far is the SELECT clause and the inital FROM.
- -- no JOINs yet nor the WHERE clause
- query_ := query_ || coal || ' AS quality ' || E'\n';
-
- -- jrows table is for the joins we must make (and the real text conditions)
- SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
- FROM _vandelay_tmp_jrows;
-
- -- add those joins and the where clause to our query.
- query_ := query_ || joins || E'\n' || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted';
-
- -- this will return rows of record,quality
- FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
- RETURN NEXT rec;
- END LOOP;
-
- DROP TABLE _vandelay_tmp_qrows;
- DROP TABLE _vandelay_tmp_jrows;
- RETURN;
-END;
-
-$$ LANGUAGE PLPGSQL;
-
-CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
- match_set_id INTEGER,
- tags_rstore HSTORE
-) RETURNS TEXT AS $$
-DECLARE
- root vandelay.match_set_point;
-BEGIN
- SELECT * INTO root FROM vandelay.match_set_point
- WHERE parent IS NULL AND match_set = match_set_id;
-
- RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore);
-END;
-$$ LANGUAGE PLPGSQL;
-
-CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
- node vandelay.match_set_point,
- tags_rstore HSTORE
-) RETURNS TEXT AS $$
-DECLARE
- q TEXT;
- i INTEGER;
- this_op TEXT;
- children INTEGER[];
- child vandelay.match_set_point;
-BEGIN
- SELECT ARRAY_ACCUM(id) INTO children FROM vandelay.match_set_point
- WHERE parent = node.id;
-
- IF ARRAY_LENGTH(children, 1) > 0 THEN
- this_op := vandelay._get_expr_render_one(node);
- q := '(';
- i := 1;
- WHILE children[i] IS NOT NULL LOOP
- SELECT * INTO child FROM vandelay.match_set_point
- WHERE id = children[i];
- IF i > 1 THEN
- q := q || ' ' || this_op || ' ';
- END IF;
- i := i + 1;
- q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore);
- END LOOP;
- q := q || ')';
- RETURN q;
- ELSIF node.bool_op IS NULL THEN
- PERFORM vandelay._get_expr_push_qrow(node);
- PERFORM vandelay._get_expr_push_jrow(node, tags_rstore);
- RETURN vandelay._get_expr_render_one(node);
- ELSE
- RETURN '';
- END IF;
-END;
-$$ LANGUAGE PLPGSQL;
-
-CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
- node vandelay.match_set_point,
- tags_rstore HSTORE
-) RETURNS VOID AS $$
-DECLARE
- jrow TEXT;
- my_alias TEXT;
- op TEXT;
- tagkey TEXT;
- caseless BOOL;
- jrow_count INT;
- my_using TEXT;
- my_join TEXT;
-BEGIN
- -- remember $1 is tags_rstore, and $2 is svf_rstore
-
- caseless := FALSE;
- SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
- IF jrow_count > 0 THEN
- my_using := ' USING (record)';
- my_join := 'FULL OUTER JOIN';
- ELSE
- my_using := '';
- my_join := 'FROM';
- END IF;
-
- IF node.tag IS NOT NULL THEN
- caseless := (node.tag IN ('020', '022', '024'));
- tagkey := node.tag;
- IF node.subfield IS NOT NULL THEN
- tagkey := tagkey || node.subfield;
- END IF;
- END IF;
-
- IF node.negate THEN
- IF caseless THEN
- op := 'NOT LIKE';
- ELSE
- op := '<>';
- END IF;
- ELSE
- IF caseless THEN
- op := 'LIKE';
- ELSE
- op := '=';
- END IF;
- END IF;
-
- my_alias := 'n' || node.id::TEXT;
-
- jrow := my_join || ' (SELECT *, ';
- IF node.tag IS NOT NULL THEN
- jrow := jrow || node.quality ||
- ' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' ||
- node.tag || '''';
- IF node.subfield IS NOT NULL THEN
- jrow := jrow || ' AND mfr.subfield = ''' ||
- node.subfield || '''';
- END IF;
- jrow := jrow || ' AND (';
- jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
- jrow := jrow || ')) ' || my_alias || my_using || E'\n';
- ELSE -- svf
- jrow := jrow || 'id AS record, ' || node.quality ||
- ' AS quality FROM metabib.record_attr mra WHERE mra.attrs->''' ||
- node.svf || ''' ' || op || ' $2->''' || node.svf || ''') ' ||
- my_alias || my_using || E'\n';
- END IF;
- INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
-END;
-$$ LANGUAGE PLPGSQL;
-
-CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
- caseless BOOLEAN,
- op TEXT,
- tags_rstore HSTORE,
- tagkey TEXT
-) RETURNS TEXT AS $$
-DECLARE
- result TEXT;
- i INT;
- vals TEXT[];
-BEGIN
- i := 1;
- vals := tags_rstore->tagkey;
- result := '';
-
- WHILE TRUE LOOP
- IF i > 1 THEN
- IF vals[i] IS NULL THEN
- EXIT;
- ELSE
- result := result || ' OR ';
- END IF;
- END IF;
-
- IF caseless THEN
- result := result || 'LOWER(mfr.value) ' || op;
- ELSE
- result := result || 'mfr.value ' || op;
- END IF;
-
- result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
-
- IF vals[i] IS NULL THEN
- EXIT;
- END IF;
- i := i + 1;
- END LOOP;
-
- RETURN result;
-
-END;
-$$ LANGUAGE PLPGSQL;
-
--- drop old versions of these functions with fewer args
-DROP FUNCTION vandelay.get_expr_from_match_set( INTEGER );
-DROP FUNCTION vandelay.get_expr_from_match_set_point( vandelay.match_set_point );
-DROP FUNCTION vandelay._get_expr_push_jrow( vandelay.match_set_point );
-
--- This next index might fully supplant an existing one but leaving both for now
--- (they are not too large)
--- The reason we need this index is to ensure that the query parser always
--- prefers this index over the simpler tag/subfield index, as this greatly
--- increases Vandelay overlay speed for these identifiers, especially when
--- a record has many of these fields (around > 4-6 seems like the cutoff
--- on at least one PG9.1 system)
--- A similar index could be added for other fields (e.g. 010), but one should
--- leave out the LOWER() in all other cases.
--- TODO: verify whether we can discard the non tag/subfield/substring version
--- (metabib_full_rec_isxn_caseless_idx)
-CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
- ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
- WHERE tag IN ('020', '022', '024');
-
-COMMIT;
-