BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0908', :eg_version); -- ldw/mdriscoll/dyrcona
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0909', :eg_version); -- berick/miker
CREATE TABLE config.bib_source (
id SERIAL PRIMARY KEY,
--- /dev/null
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('0909', :eg_version);
+
+ALTER TABLE vandelay.authority_match
+ ADD COLUMN match_score INT NOT NULL DEFAULT 0;
+
+-- support heading=TRUE match set points
+ALTER TABLE vandelay.match_set_point
+ ADD COLUMN heading BOOLEAN NOT NULL DEFAULT FALSE,
+ DROP CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_bo,
+ ADD CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_heading_or_a_bo
+ CHECK (
+ (tag IS NOT NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NULL) OR
+ (tag IS NULL AND svf IS NOT NULL AND heading IS FALSE AND bool_op IS NULL) OR
+ (tag IS NULL AND svf IS NULL AND heading IS TRUE AND bool_op IS NULL) OR
+ (tag IS NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NOT NULL)
+ );
+
+CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
+ match_set_id INTEGER,
+ tags_rstore HSTORE,
+ auth_heading TEXT
+) RETURNS TEXT AS $$
+DECLARE
+ root vandelay.match_set_point;
+BEGIN
+ SELECT * INTO root FROM vandelay.match_set_point
+ WHERE parent IS NULL AND match_set = match_set_id;
+
+ RETURN vandelay.get_expr_from_match_set_point(
+ root, tags_rstore, auth_heading);
+END;
+$$ LANGUAGE PLPGSQL;
+
+-- backwards compat version so we don't have
+-- to modify vandelay.match_set_test_marcxml()
+CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
+ match_set_id INTEGER,
+ tags_rstore HSTORE
+) RETURNS TEXT AS $$
+BEGIN
+ RETURN vandelay.get_expr_from_match_set(
+ match_set_id, tags_rstore, NULL);
+END;
+$$ LANGUAGE PLPGSQL;
+
+
+DROP FUNCTION IF EXISTS
+ vandelay.get_expr_from_match_set_point(vandelay.match_set_point, HSTORE);
+
+CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
+ node vandelay.match_set_point,
+ tags_rstore HSTORE,
+ auth_heading TEXT
+) RETURNS TEXT AS $$
+DECLARE
+ q TEXT;
+ i INTEGER;
+ this_op TEXT;
+ children INTEGER[];
+ child vandelay.match_set_point;
+BEGIN
+ SELECT ARRAY_AGG(id) INTO children FROM vandelay.match_set_point
+ WHERE parent = node.id;
+
+ IF ARRAY_LENGTH(children, 1) > 0 THEN
+ this_op := vandelay._get_expr_render_one(node);
+ q := '(';
+ i := 1;
+ WHILE children[i] IS NOT NULL LOOP
+ SELECT * INTO child FROM vandelay.match_set_point
+ WHERE id = children[i];
+ IF i > 1 THEN
+ q := q || ' ' || this_op || ' ';
+ END IF;
+ i := i + 1;
+ q := q || vandelay.get_expr_from_match_set_point(
+ child, tags_rstore, auth_heading);
+ END LOOP;
+ q := q || ')';
+ RETURN q;
+ ELSIF node.bool_op IS NULL THEN
+ PERFORM vandelay._get_expr_push_qrow(node);
+ PERFORM vandelay._get_expr_push_jrow(node, tags_rstore, auth_heading);
+ RETURN vandelay._get_expr_render_one(node);
+ ELSE
+ RETURN '';
+ END IF;
+END;
+$$ LANGUAGE PLPGSQL;
+
+
+DROP FUNCTION IF EXISTS
+ vandelay._get_expr_push_jrow(vandelay.match_set_point, HSTORE);
+
+CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
+ node vandelay.match_set_point,
+ tags_rstore HSTORE,
+ auth_heading TEXT
+) RETURNS VOID AS $$
+DECLARE
+ jrow TEXT;
+ my_alias TEXT;
+ op TEXT;
+ tagkey TEXT;
+ caseless BOOL;
+ jrow_count INT;
+ my_using TEXT;
+ my_join TEXT;
+ rec_table TEXT;
+BEGIN
+ -- remember $1 is tags_rstore, and $2 is svf_rstore
+ -- a non-NULL auth_heading means we're matching authority records
+
+ IF auth_heading IS NOT NULL THEN
+ rec_table := 'authority.full_rec';
+ ELSE
+ rec_table := 'metabib.full_rec';
+ END IF;
+
+ caseless := FALSE;
+ SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
+ IF jrow_count > 0 THEN
+ my_using := ' USING (record)';
+ my_join := 'FULL OUTER JOIN';
+ ELSE
+ my_using := '';
+ my_join := 'FROM';
+ END IF;
+
+ IF node.tag IS NOT NULL THEN
+ caseless := (node.tag IN ('020', '022', '024'));
+ tagkey := node.tag;
+ IF node.subfield IS NOT NULL THEN
+ tagkey := tagkey || node.subfield;
+ END IF;
+ END IF;
+
+ IF node.negate THEN
+ IF caseless THEN
+ op := 'NOT LIKE';
+ ELSE
+ op := '<>';
+ END IF;
+ ELSE
+ IF caseless THEN
+ op := 'LIKE';
+ ELSE
+ op := '=';
+ END IF;
+ END IF;
+
+ my_alias := 'n' || node.id::TEXT;
+
+ jrow := my_join || ' (SELECT *, ';
+ IF node.tag IS NOT NULL THEN
+ jrow := jrow || node.quality ||
+ ' AS quality FROM ' || rec_table || ' mfr WHERE mfr.tag = ''' ||
+ node.tag || '''';
+ IF node.subfield IS NOT NULL THEN
+ jrow := jrow || ' AND mfr.subfield = ''' ||
+ node.subfield || '''';
+ END IF;
+ jrow := jrow || ' AND (';
+ jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
+ jrow := jrow || ')) ' || my_alias || my_using || E'\n';
+ ELSE -- svf
+ IF auth_heading IS NOT NULL THEN -- authority record
+ IF node.heading AND auth_heading <> '' THEN
+ jrow := jrow || 'id AS record, ' || node.quality ||
+ ' AS quality FROM authority.record_entry are ' ||
+ ' WHERE are.heading = ''' || auth_heading || '''';
+ jrow := jrow || ') ' || my_alias || my_using || E'\n';
+ END IF;
+ ELSE -- bib record
+ jrow := jrow || 'id AS record, ' || node.quality ||
+ ' AS quality FROM metabib.record_attr_flat mraf WHERE mraf.attr = ''' ||
+ node.svf || ''' AND mraf.value ' || op || ' $2->''' || node.svf || ''') ' ||
+ my_alias || my_using || E'\n';
+ END IF;
+ END IF;
+ INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
+END;
+$$ LANGUAGE PLPGSQL;
+
+
+CREATE OR REPLACE FUNCTION vandelay.match_set_test_authxml(
+ match_set_id INTEGER, record_xml TEXT
+) RETURNS SETOF vandelay.match_set_test_result AS $$
+DECLARE
+ tags_rstore HSTORE;
+ heading TEXT;
+ coal TEXT;
+ joins TEXT;
+ query_ TEXT;
+ wq TEXT;
+ qvalue INTEGER;
+ rec RECORD;
+BEGIN
+ tags_rstore := vandelay.flatten_marc_hstore(record_xml);
+
+ SELECT normalize_heading INTO heading
+ FROM authority.normalize_heading(record_xml);
+
+ CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
+ CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
+
+ -- generate the where clause and return that directly (into wq), and as
+ -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
+ wq := vandelay.get_expr_from_match_set(
+ match_set_id, tags_rstore, heading);
+
+ query_ := 'SELECT DISTINCT(record), ';
+
+ -- qrows table is for the quality bits we add to the SELECT clause
+ SELECT STRING_AGG(
+ 'COALESCE(n' || q::TEXT || '.quality, 0)', ' + '
+ ) INTO coal FROM _vandelay_tmp_qrows;
+
+ -- our query string so far is the SELECT clause and the inital FROM.
+ -- no JOINs yet nor the WHERE clause
+ query_ := query_ || coal || ' AS quality ' || E'\n';
+
+ -- jrows table is for the joins we must make (and the real text conditions)
+ SELECT STRING_AGG(j, E'\n') INTO joins
+ FROM _vandelay_tmp_jrows;
+
+ -- add those joins and the where clause to our query.
+ query_ := query_ || joins || E'\n';
+
+ query_ := query_ || 'JOIN authority.record_entry are ON (are.id = record) '
+ || 'WHERE ' || wq || ' AND not are.deleted';
+
+ -- this will return rows of record,quality
+ FOR rec IN EXECUTE query_ USING tags_rstore LOOP
+ RETURN NEXT rec;
+ END LOOP;
+
+ DROP TABLE _vandelay_tmp_qrows;
+ DROP TABLE _vandelay_tmp_jrows;
+ RETURN;
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.measure_auth_record_quality
+ ( xml TEXT, match_set_id INT ) RETURNS INT AS $_$
+DECLARE
+ out_q INT := 0;
+ rvalue TEXT;
+ test vandelay.match_set_quality%ROWTYPE;
+BEGIN
+
+ FOR test IN SELECT * FROM vandelay.match_set_quality
+ WHERE match_set = match_set_id LOOP
+ IF test.tag IS NOT NULL THEN
+ FOR rvalue IN SELECT value FROM vandelay.flatten_marc( xml )
+ WHERE tag = test.tag AND subfield = test.subfield LOOP
+ IF test.value = rvalue THEN
+ out_q := out_q + test.quality;
+ END IF;
+ END LOOP;
+ END IF;
+ END LOOP;
+
+ RETURN out_q;
+END;
+$_$ LANGUAGE PLPGSQL;
+
+
+
+CREATE OR REPLACE FUNCTION vandelay.match_authority_record() RETURNS TRIGGER AS $func$
+DECLARE
+ incoming_existing_id TEXT;
+ test_result vandelay.match_set_test_result%ROWTYPE;
+ tmp_rec BIGINT;
+ match_set INT;
+BEGIN
+ IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN
+ RETURN NEW;
+ END IF;
+
+ DELETE FROM vandelay.authority_match WHERE queued_record = NEW.id;
+
+ SELECT q.match_set INTO match_set FROM vandelay.authority_queue q WHERE q.id = NEW.queue;
+
+ IF match_set IS NOT NULL THEN
+ NEW.quality := vandelay.measure_auth_record_quality( NEW.marc, match_set );
+ END IF;
+
+ -- Perfect matches on 901$c exit early with a match with high quality.
+ incoming_existing_id :=
+ oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]', NEW.marc);
+
+ IF incoming_existing_id IS NOT NULL AND incoming_existing_id != '' THEN
+ SELECT id INTO tmp_rec FROM authority.record_entry WHERE id = incoming_existing_id::bigint;
+ IF tmp_rec IS NOT NULL THEN
+ INSERT INTO vandelay.authority_match (queued_record, eg_record, match_score, quality)
+ SELECT
+ NEW.id,
+ b.id,
+ 9999,
+ -- note: no match_set means quality==0
+ vandelay.measure_auth_record_quality( b.marc, match_set )
+ FROM authority.record_entry b
+ WHERE id = incoming_existing_id::bigint;
+ END IF;
+ END IF;
+
+ IF match_set IS NULL THEN
+ RETURN NEW;
+ END IF;
+
+ FOR test_result IN SELECT * FROM
+ vandelay.match_set_test_authxml(match_set, NEW.marc) LOOP
+
+ INSERT INTO vandelay.authority_match ( queued_record, eg_record, match_score, quality )
+ SELECT
+ NEW.id,
+ test_result.record,
+ test_result.quality,
+ vandelay.measure_auth_record_quality( b.marc, match_set )
+ FROM authority.record_entry b
+ WHERE id = test_result.record;
+
+ END LOOP;
+
+ RETURN NEW;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE TRIGGER zz_match_auths_trigger
+ BEFORE INSERT OR UPDATE ON vandelay.queued_authority_record
+ FOR EACH ROW EXECUTE PROCEDURE vandelay.match_authority_record();
+
+CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_record_with_best ( import_id BIGINT, merge_profile_id INT, lwm_ratio_value_p NUMERIC ) RETURNS BOOL AS $$
+DECLARE
+ eg_id BIGINT;
+ lwm_ratio_value NUMERIC;
+BEGIN
+
+ lwm_ratio_value := COALESCE(lwm_ratio_value_p, 0.0);
+
+ PERFORM * FROM vandelay.queued_authority_record WHERE import_time IS NOT NULL AND id = import_id;
+
+ IF FOUND THEN
+ -- RAISE NOTICE 'already imported, cannot auto-overlay'
+ RETURN FALSE;
+ END IF;
+
+ SELECT m.eg_record INTO eg_id
+ FROM vandelay.authority_match m
+ JOIN vandelay.queued_authority_record qr ON (m.queued_record = qr.id)
+ JOIN vandelay.authority_queue q ON (qr.queue = q.id)
+ JOIN authority.record_entry r ON (r.id = m.eg_record)
+ WHERE m.queued_record = import_id
+ AND qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC >= lwm_ratio_value
+ ORDER BY m.match_score DESC, -- required match score
+ qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC DESC, -- quality tie breaker
+ m.id -- when in doubt, use the first match
+ LIMIT 1;
+
+ IF eg_id IS NULL THEN
+ -- RAISE NOTICE 'incoming record is not of high enough quality';
+ RETURN FALSE;
+ END IF;
+
+ RETURN vandelay.overlay_authority_record( import_id, eg_id, merge_profile_id );
+END;
+$$ LANGUAGE PLPGSQL;
+
+
+COMMIT;
+++ /dev/null
-BEGIN;
-
-ALTER TABLE vandelay.authority_match
- ADD COLUMN match_score INT NOT NULL DEFAULT 0;
-
--- support heading=TRUE match set points
-ALTER TABLE vandelay.match_set_point
- ADD COLUMN heading BOOLEAN NOT NULL DEFAULT FALSE,
- DROP CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_bo,
- ADD CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_heading_or_a_bo
- CHECK (
- (tag IS NOT NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NULL) OR
- (tag IS NULL AND svf IS NOT NULL AND heading IS FALSE AND bool_op IS NULL) OR
- (tag IS NULL AND svf IS NULL AND heading IS TRUE AND bool_op IS NULL) OR
- (tag IS NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NOT NULL)
- );
-
-CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
- match_set_id INTEGER,
- tags_rstore HSTORE,
- auth_heading TEXT
-) RETURNS TEXT AS $$
-DECLARE
- root vandelay.match_set_point;
-BEGIN
- SELECT * INTO root FROM vandelay.match_set_point
- WHERE parent IS NULL AND match_set = match_set_id;
-
- RETURN vandelay.get_expr_from_match_set_point(
- root, tags_rstore, auth_heading);
-END;
-$$ LANGUAGE PLPGSQL;
-
--- backwards compat version so we don't have
--- to modify vandelay.match_set_test_marcxml()
-CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
- match_set_id INTEGER,
- tags_rstore HSTORE
-) RETURNS TEXT AS $$
-BEGIN
- RETURN vandelay.get_expr_from_match_set(
- match_set_id, tags_rstore, NULL);
-END;
-$$ LANGUAGE PLPGSQL;
-
-
-DROP FUNCTION IF EXISTS
- vandelay.get_expr_from_match_set_point(vandelay.match_set_point, HSTORE);
-
-CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
- node vandelay.match_set_point,
- tags_rstore HSTORE,
- auth_heading TEXT
-) RETURNS TEXT AS $$
-DECLARE
- q TEXT;
- i INTEGER;
- this_op TEXT;
- children INTEGER[];
- child vandelay.match_set_point;
-BEGIN
- SELECT ARRAY_AGG(id) INTO children FROM vandelay.match_set_point
- WHERE parent = node.id;
-
- IF ARRAY_LENGTH(children, 1) > 0 THEN
- this_op := vandelay._get_expr_render_one(node);
- q := '(';
- i := 1;
- WHILE children[i] IS NOT NULL LOOP
- SELECT * INTO child FROM vandelay.match_set_point
- WHERE id = children[i];
- IF i > 1 THEN
- q := q || ' ' || this_op || ' ';
- END IF;
- i := i + 1;
- q := q || vandelay.get_expr_from_match_set_point(
- child, tags_rstore, auth_heading);
- END LOOP;
- q := q || ')';
- RETURN q;
- ELSIF node.bool_op IS NULL THEN
- PERFORM vandelay._get_expr_push_qrow(node);
- PERFORM vandelay._get_expr_push_jrow(node, tags_rstore, auth_heading);
- RETURN vandelay._get_expr_render_one(node);
- ELSE
- RETURN '';
- END IF;
-END;
-$$ LANGUAGE PLPGSQL;
-
-
-DROP FUNCTION IF EXISTS
- vandelay._get_expr_push_jrow(vandelay.match_set_point, HSTORE);
-
-CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
- node vandelay.match_set_point,
- tags_rstore HSTORE,
- auth_heading TEXT
-) RETURNS VOID AS $$
-DECLARE
- jrow TEXT;
- my_alias TEXT;
- op TEXT;
- tagkey TEXT;
- caseless BOOL;
- jrow_count INT;
- my_using TEXT;
- my_join TEXT;
- rec_table TEXT;
-BEGIN
- -- remember $1 is tags_rstore, and $2 is svf_rstore
- -- a non-NULL auth_heading means we're matching authority records
-
- IF auth_heading IS NOT NULL THEN
- rec_table := 'authority.full_rec';
- ELSE
- rec_table := 'metabib.full_rec';
- END IF;
-
- caseless := FALSE;
- SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
- IF jrow_count > 0 THEN
- my_using := ' USING (record)';
- my_join := 'FULL OUTER JOIN';
- ELSE
- my_using := '';
- my_join := 'FROM';
- END IF;
-
- IF node.tag IS NOT NULL THEN
- caseless := (node.tag IN ('020', '022', '024'));
- tagkey := node.tag;
- IF node.subfield IS NOT NULL THEN
- tagkey := tagkey || node.subfield;
- END IF;
- END IF;
-
- IF node.negate THEN
- IF caseless THEN
- op := 'NOT LIKE';
- ELSE
- op := '<>';
- END IF;
- ELSE
- IF caseless THEN
- op := 'LIKE';
- ELSE
- op := '=';
- END IF;
- END IF;
-
- my_alias := 'n' || node.id::TEXT;
-
- jrow := my_join || ' (SELECT *, ';
- IF node.tag IS NOT NULL THEN
- jrow := jrow || node.quality ||
- ' AS quality FROM ' || rec_table || ' mfr WHERE mfr.tag = ''' ||
- node.tag || '''';
- IF node.subfield IS NOT NULL THEN
- jrow := jrow || ' AND mfr.subfield = ''' ||
- node.subfield || '''';
- END IF;
- jrow := jrow || ' AND (';
- jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
- jrow := jrow || ')) ' || my_alias || my_using || E'\n';
- ELSE -- svf
- IF auth_heading IS NOT NULL THEN -- authority record
- IF node.heading AND auth_heading <> '' THEN
- jrow := jrow || 'id AS record, ' || node.quality ||
- ' AS quality FROM authority.record_entry are ' ||
- ' WHERE are.heading = ''' || auth_heading || '''';
- jrow := jrow || ') ' || my_alias || my_using || E'\n';
- END IF;
- ELSE -- bib record
- jrow := jrow || 'id AS record, ' || node.quality ||
- ' AS quality FROM metabib.record_attr_flat mraf WHERE mraf.attr = ''' ||
- node.svf || ''' AND mraf.value ' || op || ' $2->''' || node.svf || ''') ' ||
- my_alias || my_using || E'\n';
- END IF;
- END IF;
- INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
-END;
-$$ LANGUAGE PLPGSQL;
-
-
-CREATE OR REPLACE FUNCTION vandelay.match_set_test_authxml(
- match_set_id INTEGER, record_xml TEXT
-) RETURNS SETOF vandelay.match_set_test_result AS $$
-DECLARE
- tags_rstore HSTORE;
- heading TEXT;
- coal TEXT;
- joins TEXT;
- query_ TEXT;
- wq TEXT;
- qvalue INTEGER;
- rec RECORD;
-BEGIN
- tags_rstore := vandelay.flatten_marc_hstore(record_xml);
-
- SELECT normalize_heading INTO heading
- FROM authority.normalize_heading(record_xml);
-
- CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
- CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
-
- -- generate the where clause and return that directly (into wq), and as
- -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
- wq := vandelay.get_expr_from_match_set(
- match_set_id, tags_rstore, heading);
-
- query_ := 'SELECT DISTINCT(record), ';
-
- -- qrows table is for the quality bits we add to the SELECT clause
- SELECT STRING_AGG(
- 'COALESCE(n' || q::TEXT || '.quality, 0)', ' + '
- ) INTO coal FROM _vandelay_tmp_qrows;
-
- -- our query string so far is the SELECT clause and the inital FROM.
- -- no JOINs yet nor the WHERE clause
- query_ := query_ || coal || ' AS quality ' || E'\n';
-
- -- jrows table is for the joins we must make (and the real text conditions)
- SELECT STRING_AGG(j, E'\n') INTO joins
- FROM _vandelay_tmp_jrows;
-
- -- add those joins and the where clause to our query.
- query_ := query_ || joins || E'\n';
-
- query_ := query_ || 'JOIN authority.record_entry are ON (are.id = record) '
- || 'WHERE ' || wq || ' AND not are.deleted';
-
- -- this will return rows of record,quality
- FOR rec IN EXECUTE query_ USING tags_rstore LOOP
- RETURN NEXT rec;
- END LOOP;
-
- DROP TABLE _vandelay_tmp_qrows;
- DROP TABLE _vandelay_tmp_jrows;
- RETURN;
-END;
-$$ LANGUAGE PLPGSQL;
-
-CREATE OR REPLACE FUNCTION vandelay.measure_auth_record_quality
- ( xml TEXT, match_set_id INT ) RETURNS INT AS $_$
-DECLARE
- out_q INT := 0;
- rvalue TEXT;
- test vandelay.match_set_quality%ROWTYPE;
-BEGIN
-
- FOR test IN SELECT * FROM vandelay.match_set_quality
- WHERE match_set = match_set_id LOOP
- IF test.tag IS NOT NULL THEN
- FOR rvalue IN SELECT value FROM vandelay.flatten_marc( xml )
- WHERE tag = test.tag AND subfield = test.subfield LOOP
- IF test.value = rvalue THEN
- out_q := out_q + test.quality;
- END IF;
- END LOOP;
- END IF;
- END LOOP;
-
- RETURN out_q;
-END;
-$_$ LANGUAGE PLPGSQL;
-
-
-
-CREATE OR REPLACE FUNCTION vandelay.match_authority_record() RETURNS TRIGGER AS $func$
-DECLARE
- incoming_existing_id TEXT;
- test_result vandelay.match_set_test_result%ROWTYPE;
- tmp_rec BIGINT;
- match_set INT;
-BEGIN
- IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN
- RETURN NEW;
- END IF;
-
- DELETE FROM vandelay.authority_match WHERE queued_record = NEW.id;
-
- SELECT q.match_set INTO match_set FROM vandelay.authority_queue q WHERE q.id = NEW.queue;
-
- IF match_set IS NOT NULL THEN
- NEW.quality := vandelay.measure_auth_record_quality( NEW.marc, match_set );
- END IF;
-
- -- Perfect matches on 901$c exit early with a match with high quality.
- incoming_existing_id :=
- oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]', NEW.marc);
-
- IF incoming_existing_id IS NOT NULL AND incoming_existing_id != '' THEN
- SELECT id INTO tmp_rec FROM authority.record_entry WHERE id = incoming_existing_id::bigint;
- IF tmp_rec IS NOT NULL THEN
- INSERT INTO vandelay.authority_match (queued_record, eg_record, match_score, quality)
- SELECT
- NEW.id,
- b.id,
- 9999,
- -- note: no match_set means quality==0
- vandelay.measure_auth_record_quality( b.marc, match_set )
- FROM authority.record_entry b
- WHERE id = incoming_existing_id::bigint;
- END IF;
- END IF;
-
- IF match_set IS NULL THEN
- RETURN NEW;
- END IF;
-
- FOR test_result IN SELECT * FROM
- vandelay.match_set_test_authxml(match_set, NEW.marc) LOOP
-
- INSERT INTO vandelay.authority_match ( queued_record, eg_record, match_score, quality )
- SELECT
- NEW.id,
- test_result.record,
- test_result.quality,
- vandelay.measure_auth_record_quality( b.marc, match_set )
- FROM authority.record_entry b
- WHERE id = test_result.record;
-
- END LOOP;
-
- RETURN NEW;
-END;
-$func$ LANGUAGE PLPGSQL;
-
-CREATE TRIGGER zz_match_auths_trigger
- BEFORE INSERT OR UPDATE ON vandelay.queued_authority_record
- FOR EACH ROW EXECUTE PROCEDURE vandelay.match_authority_record();
-
-CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_record_with_best ( import_id BIGINT, merge_profile_id INT, lwm_ratio_value_p NUMERIC ) RETURNS BOOL AS $$
-DECLARE
- eg_id BIGINT;
- lwm_ratio_value NUMERIC;
-BEGIN
-
- lwm_ratio_value := COALESCE(lwm_ratio_value_p, 0.0);
-
- PERFORM * FROM vandelay.queued_authority_record WHERE import_time IS NOT NULL AND id = import_id;
-
- IF FOUND THEN
- -- RAISE NOTICE 'already imported, cannot auto-overlay'
- RETURN FALSE;
- END IF;
-
- SELECT m.eg_record INTO eg_id
- FROM vandelay.authority_match m
- JOIN vandelay.queued_authority_record qr ON (m.queued_record = qr.id)
- JOIN vandelay.authority_queue q ON (qr.queue = q.id)
- JOIN authority.record_entry r ON (r.id = m.eg_record)
- WHERE m.queued_record = import_id
- AND qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC >= lwm_ratio_value
- ORDER BY m.match_score DESC, -- required match score
- qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC DESC, -- quality tie breaker
- m.id -- when in doubt, use the first match
- LIMIT 1;
-
- IF eg_id IS NULL THEN
- -- RAISE NOTICE 'incoming record is not of high enough quality';
- RETURN FALSE;
- END IF;
-
- RETURN vandelay.overlay_authority_record( import_id, eg_id, merge_profile_id );
-END;
-$$ LANGUAGE PLPGSQL;
-
-
-COMMIT;