SELECT vandelay.extract_rec_attrs( $1, (SELECT ARRAY_ACCUM(name) FROM config.record_attr_definition));
$_$ LANGUAGE SQL;
-CREATE OR REPLACE FUNCTION vandelay.match_bib_record ( ) RETURNS TRIGGER AS $func$
+-- Everything between this comment and the beginning of the definition of
+-- vandelay.match_bib_record() is strictly in service of that function.
+CREATE TYPE vandelay.match_set_test_result AS (record BIGINT, quality INTEGER);
+
+CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml(
+ match_set_id INTEGER, record_xml TEXT
+) RETURNS SETOF vandelay.match_set_test_result AS $$
+DECLARE
+ tags_rstore HSTORE;
+ svf_rstore HSTORE;
+ coal TEXT;
+ joins TEXT;
+ query_ TEXT;
+ wq TEXT;
+ qvalue INTEGER;
+ rec RECORD;
+BEGIN
+ tags_rstore := vandelay.flatten_marc_hstore(record_xml);
+ svf_rstore := vandelay.extract_rec_attrs(record_xml);
+
+ CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
+ CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
+
+ -- generate the where clause and return that directly (into wq), and as
+ -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
+ wq := vandelay.get_expr_from_match_set(match_set_id);
+
+ query_ := 'SELECT bre.id AS record, ';
+
+ -- qrows table is for the quality bits we add to the SELECT clause
+ SELECT ARRAY_TO_STRING(
+ ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + '
+ ) INTO coal FROM _vandelay_tmp_qrows;
+
+ -- our query string so far is the SELECT clause and the inital FROM.
+ -- no JOINs yet nor the WHERE clause
+ query_ := query_ || coal || ' AS quality ' || E'\n' ||
+ 'FROM biblio.record_entry bre ';
+
+ -- jrows table is for the joins we must make (and the real text conditions)
+ SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
+ FROM _vandelay_tmp_jrows;
+
+ -- add those joins and the where clause to our query.
+ query_ := query_ || joins || E'\n' || 'WHERE ' || wq;
+
+ -- this will return rows of record,quality
+ FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
+ RETURN NEXT rec;
+ END LOOP;
+
+ DROP TABLE _vandelay_tmp_qrows;
+ DROP TABLE _vandelay_tmp_jrows;
+ RETURN;
+END;
+
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.flatten_marc_hstore(
+ record_xml TEXT
+) RETURNS HSTORE AS $$
+BEGIN
+ RETURN (SELECT
+ HSTORE(
+ ARRAY_ACCUM(tag || (COALESCE(subfield, ''))),
+ ARRAY_ACCUM(value)
+ )
+ FROM (
+ SELECT tag, subfield, ARRAY_ACCUM(value)::TEXT AS value
+ FROM vandelay.flatten_marc(record_xml)
+ GROUP BY tag, subfield ORDER BY tag, subfield
+ ) subquery
+ );
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
+ match_set_id INTEGER
+) RETURNS TEXT AS $$
+DECLARE
+ root vandelay.match_set_point;
+BEGIN
+ SELECT * INTO root FROM vandelay.match_set_point
+ WHERE parent IS NULL AND match_set = match_set_id;
+
+ RETURN vandelay.get_expr_from_match_set_point(root);
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
+ node vandelay.match_set_point
+) RETURNS TEXT AS $$
+DECLARE
+ q TEXT;
+ i INTEGER;
+ this_op TEXT;
+ children INTEGER[];
+ child vandelay.match_set_point;
+BEGIN
+ SELECT ARRAY_ACCUM(id) INTO children FROM vandelay.match_set_point
+ WHERE parent = node.id;
+
+ IF ARRAY_LENGTH(children, 1) > 0 THEN
+ this_op := vandelay._get_expr_render_one(node);
+ q := '(';
+ i := 1;
+ WHILE children[i] IS NOT NULL LOOP
+ SELECT * INTO child FROM vandelay.match_set_point
+ WHERE id = children[i];
+ IF i > 1 THEN
+ q := q || ' ' || this_op || ' ';
+ END IF;
+ i := i + 1;
+ q := q || vandelay.get_expr_from_match_set_point(child);
+ END LOOP;
+ q := q || ')';
+ RETURN q;
+ ELSIF node.bool_op IS NULL THEN
+ PERFORM vandelay._get_expr_push_qrow(node);
+ PERFORM vandelay._get_expr_push_jrow(node);
+ RETURN vandelay._get_expr_render_one(node);
+ ELSE
+ RETURN '';
+ END IF;
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay._get_expr_push_qrow(
+ node vandelay.match_set_point
+) RETURNS VOID AS $$
+DECLARE
+BEGIN
+ INSERT INTO _vandelay_tmp_qrows (q) VALUES (node.id);
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
+ node vandelay.match_set_point
+) RETURNS VOID AS $$
+DECLARE
+ jrow TEXT;
+ my_alias TEXT;
+ op TEXT;
+ tagkey TEXT;
+BEGIN
+ IF node.negate THEN
+ op := '<>';
+ ELSE
+ op := '=';
+ END IF;
+
+ IF node.tag IS NOT NULL THEN
+ tagkey := node.tag;
+ IF node.subfield IS NOT NULL THEN
+ tagkey := tagkey || node.subfield;
+ END IF;
+ END IF;
+
+ my_alias := 'n' || node.id::TEXT;
+
+ jrow := 'LEFT JOIN (SELECT *, ' || node.quality ||
+ ' AS quality FROM metabib.';
+ IF node.tag IS NOT NULL THEN
+ jrow := jrow || 'full_rec) ' || my_alias || ' ON (' ||
+ my_alias || '.record = bre.id AND ' || my_alias || '.tag = ''' ||
+ node.tag || '''';
+ IF node.subfield IS NOT NULL THEN
+ jrow := jrow || ' AND ' || my_alias || '.subfield = ''' ||
+ node.subfield || '''';
+ END IF;
+ jrow := jrow || ' AND (' || my_alias || '.value ' || op ||
+ ' ANY(($1->''' || tagkey || ''')::TEXT[])))';
+ ELSE -- svf
+ jrow := jrow || 'record_attr) ' || my_alias || ' ON (' ||
+ my_alias || '.id = bre.id AND (' ||
+ my_alias || '.attrs->''' || node.svf ||
+ ''' ' || op || ' $2->''' || node.svf || '''))';
+ END IF;
+ INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay._get_expr_render_one(
+ node vandelay.match_set_point
+) RETURNS TEXT AS $$
+DECLARE
+ s TEXT;
+BEGIN
+ IF node.bool_op IS NOT NULL THEN
+ RETURN node.bool_op;
+ ELSE
+ RETURN '(n' || node.id::TEXT || '.id IS ' ||
+ (CASE WHEN node.negate THEN '' ELSE 'NOT ' END) || -- sic!
+ 'NULL)';
+ END IF;
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.match_bib_record() RETURNS TRIGGER AS $func$
DECLARE
incoming_existing_id TEXT;
my_bib_queue vandelay.bib_queue%ROWTYPE;
- my_match_set vandelay.match_set%ROWTYPE;
- test vandelay.match_set_point%ROWTYPE;
- potential_matches BIGINT[];
- matches BIGINT[];
- rvalue TEXT;
- quality_set hstore;
+ test_result vandelay.match_set_test_result%ROWTYPE;
tmp_rec BIGINT;
- tmp_quality INT;
- first_round BOOL;
BEGIN
DELETE FROM vandelay.bib_match WHERE queued_record = NEW.id;
- incoming_existing_id := oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]',NEW.marc);
+ -- Perfect matches on 901$c exit early with a match with high quality.
+ incoming_existing_id :=
+ oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]', NEW.marc);
IF incoming_existing_id IS NOT NULL AND incoming_existing_id != '' THEN
SELECT id INTO tmp_rec FROM biblio.record_entry WHERE id = incoming_existing_id::bigint;
SELECT * INTO my_bib_queue FROM vandelay.bib_queue WHERE id = NEW.queue;
- first_round := TRUE;
- -- whew ... here we go ...
-
-
- -- Commented out until replaced by tree-ish version
-/*
- FOR test IN SELECT * FROM vandelay.match_set_point WHERE match_set = my_bib_queue.match_set ORDER BY required DESC LOOP
- IF test.tag IS NOT NULL THEN
- FOR rvalue IN SELECT value FROM vandelay.flatten_marc( xml ) WHERE tag = test.tag AND subfield = test.subfield LOOP
- SELECT ARRAY_ACCUM(DISTINCT record) INTO potential_matches FROM metabib.real_full_rec WHERE tag = test.tag AND subfield = test.subfield AND value = rvalue;
-
- IF first_round THEN
- matches := potential_matches;
- first_round := FALSE;
- ELSIF test.required THEN
- FOR tmp_rec IN SELECT * FROM UNNEST(matches) LOOP
- IF tmp_rec NOT IN (SELECT * FROM UNNEST(potential_matches)) THEN
- matches := evergreen.array_remove_item_by_value(matches, tmp_rec);
- potential_matches := evergreen.array_remove_item_by_value(potential_matches, tmp_rec);
- END IF;
- END LOOP;
- END IF;
+ FOR test_result IN SELECT * FROM
+ vandelay.match_set_test_marcxml(my_bib_queue.match_set, NEW.marc) LOOP
- -- add the quality for this match
- FOR tmp_rec IN SELECT * FROM UNNEST(potential_matches) LOOP
- tmp_quality := COALESCE((quality_set -> tmp_rec::TEXT)::INT, 0);
- quality_set := quality_set || hstore(tmp_rec::TEXT, (tmp_quality + test.quality)::TEXT);
- END LOOP;
-
- END LOOP;
- ELSE
- rvalue := vandelay.vandelay.extract_rec_attrs(xml, ARRAY[test.svf]);
-
- IF first_round THEN
- matches := potential_matches;
- first_round := FALSE;
- ELSIF test.required THEN
- FOR tmp_rec IN SELECT * FROM UNNEST(matches) LOOP
- IF tmp_rec NOT IN (SELECT * FROM UNNEST(potential_matches)) THEN
- matches := evergreen.array_remove_item_by_value(matches, tmp_rec);
- potential_matches := evergreen.array_remove_item_by_value(potential_matches, tmp_rec);
- END IF;
- END LOOP;
- END IF;
-
- -- add the quality for this match
- FOR tmp_rec IN SELECT * FROM UNNEST(potential_matches) LOOP
- tmp_quality := COALESCE((quality_set -> tmp_rec::TEXT)::INT, 0);
- quality_set := quality_set || hstore(tmp_rec::TEXT, (tmp_quality + test.quality)::TEXT);
- END LOOP;
-
- END IF;
- END LOOP;
+ INSERT INTO vandelay.bib_match (
+ matched_set, queued_record, eg_record, quality
+ ) VALUES (
+ my_bib_queue.match_set, NEW.id, test_result.record,
+ test_result.quality
+ );
- FOR tmp_rec IN SELECT * FROM UNNEST(matches) LOOP
- INSERT INTO vandelay.bib_match (matched_set, queued_record, eg_record, quality) VALUES (my_bib_queue.match_set, NEW.id, tmp_rec, (quality_set -> tmp_rec::TEXT));
END LOOP;
-*/
RETURN NEW;
END;