From: Bill Erickson Date: Fri, 14 Nov 2014 15:16:27 +0000 (-0500) Subject: LP#1171984 Vandelay authority record matching X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=af7304c9d754159f9dac8fe60007e8b89ba639e9;p=evergreen%2Fmasslnc.git LP#1171984 Vandelay authority record matching Adds support for building and using authority match sets in vandelay for import record matching. Match sets include MARC tag/subfield matching and normalized authority heading + thesaurus matching. Commit also contains various small Vandelay UI repairs related to displaying authority queues and matches. Signed-off-by: Bill Erickson Signed-off-by: Mike Rylander --- diff --git a/Open-ILS/examples/fm_IDL.xml b/Open-ILS/examples/fm_IDL.xml index 273512f990..cb302466a8 100644 --- a/Open-ILS/examples/fm_IDL.xml +++ b/Open-ILS/examples/fm_IDL.xml @@ -602,6 +602,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + @@ -649,6 +650,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm index efb6cc75fc..588642d011 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm @@ -123,6 +123,7 @@ sub create_auth_queue { $queue->name( $name ); $queue->owner( $owner ); $queue->queue_type( $type ) if ($type); + $queue->match_set($match_set) if $match_set; my $new_q = $e->create_vandelay_authority_queue( $queue ); $e->die_event unless ($new_q); @@ -914,7 +915,7 @@ sub import_record_list_impl { my $overlay_func = 'vandelay.overlay_bib_record'; my $auto_overlay_func = 'vandelay.auto_overlay_bib_record'; - my $auto_overlay_best_func = 'vandelay.auto_overlay_bib_record_with_best'; # XXX bib-only + my $auto_overlay_best_func = 'vandelay.auto_overlay_bib_record_with_best'; my $retrieve_func = 'retrieve_vandelay_queued_bib_record'; my $update_func = 'update_vandelay_queued_bib_record'; my $search_func = 'search_vandelay_queued_bib_record'; @@ -932,6 +933,7 @@ sub import_record_list_impl { if($type eq 'auth') { $overlay_func =~ s/bib/authority/o; $auto_overlay_func =~ s/bib/authority/o; + $auto_overlay_best_func =~ s/bib/authority/o; $retrieve_func =~ s/bib/authority/o; $retrieve_queue_func =~ s/bib/authority/o; $update_queue_func =~ s/bib/authority/o; @@ -2085,7 +2087,8 @@ sub _walk_new_vmsp { my $point = new Fieldmapper::vandelay::match_set_point; $point->parent($parent_id); $point->match_set($match_set_id); - $point->$_($node->$_) for (qw/bool_op svf tag subfield negate quality/); + $point->$_($node->$_) + for (qw/bool_op svf tag subfield negate quality heading/); $e->create_vandelay_match_set_point($point) or return $e->die_event; diff --git a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql index 67aab22b85..b86eda1a9b 100644 --- a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql +++ b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql @@ -23,11 +23,13 @@ CREATE TABLE vandelay.match_set_point ( subfield TEXT, negate BOOL DEFAULT FALSE, quality INT NOT NULL DEFAULT 1, -- higher is better + heading BOOLEAN NOT NULL DEFAULT FALSE, -- match on authority heading CONSTRAINT vmsp_need_a_subfield_with_a_tag CHECK ((tag IS NOT NULL AND subfield IS NOT NULL) OR tag IS NULL), CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_bo CHECK ( - (tag IS NOT NULL AND svf IS NULL AND bool_op IS NULL) OR - (tag IS NULL AND svf IS NOT NULL AND bool_op IS NULL) OR - (tag IS NULL AND svf IS NULL AND bool_op IS NOT NULL) + (tag IS NOT NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NULL) OR + (tag IS NULL AND svf IS NOT NULL AND heading IS FALSE AND bool_op IS NULL) OR + (tag IS NULL AND svf IS NULL AND heading IS TRUE AND bool_op IS NULL) OR + (tag IS NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NOT NULL) ) ); @@ -638,23 +640,38 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; +-- backwards compat version so we don't have +-- to modify vandelay.match_set_test_marcxml() CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set( match_set_id INTEGER, tags_rstore HSTORE ) RETURNS TEXT AS $$ +BEGIN + RETURN vandelay.get_expr_from_match_set( + match_set_id, tags_rstore, NULL); +END; +$$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set( + match_set_id INTEGER, + tags_rstore HSTORE, + auth_heading TEXT +) RETURNS TEXT AS $$ DECLARE - root vandelay.match_set_point; + root vandelay.match_set_point; BEGIN SELECT * INTO root FROM vandelay.match_set_point WHERE parent IS NULL AND match_set = match_set_id; - RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore); + RETURN vandelay.get_expr_from_match_set_point( + root, tags_rstore, auth_heading); END; $$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point( node vandelay.match_set_point, - tags_rstore HSTORE + tags_rstore HSTORE, + auth_heading TEXT ) RETURNS TEXT AS $$ DECLARE q TEXT; @@ -677,13 +694,14 @@ BEGIN q := q || ' ' || this_op || ' '; END IF; i := i + 1; - q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore); + q := q || vandelay.get_expr_from_match_set_point( + child, tags_rstore, auth_heading); END LOOP; q := q || ')'; RETURN q; ELSIF node.bool_op IS NULL THEN PERFORM vandelay._get_expr_push_qrow(node); - PERFORM vandelay._get_expr_push_jrow(node, tags_rstore); + PERFORM vandelay._get_expr_push_jrow(node, tags_rstore, auth_heading); RETURN vandelay._get_expr_render_one(node); ELSE RETURN ''; @@ -702,7 +720,8 @@ $$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow( node vandelay.match_set_point, - tags_rstore HSTORE + tags_rstore HSTORE, + auth_heading TEXT ) RETURNS VOID AS $$ DECLARE jrow TEXT; @@ -713,8 +732,16 @@ DECLARE jrow_count INT; my_using TEXT; my_join TEXT; + rec_table TEXT; BEGIN -- remember $1 is tags_rstore, and $2 is svf_rstore + -- a non-NULL auth_heading means we're matching authority records + + IF auth_heading IS NOT NULL THEN + rec_table := 'authority.full_rec'; + ELSE + rec_table := 'metabib.full_rec'; + END IF; caseless := FALSE; SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows; @@ -753,7 +780,7 @@ BEGIN jrow := my_join || ' (SELECT *, '; IF node.tag IS NOT NULL THEN jrow := jrow || node.quality || - ' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' || + ' AS quality FROM ' || rec_table || ' mfr WHERE mfr.tag = ''' || node.tag || ''''; IF node.subfield IS NOT NULL THEN jrow := jrow || ' AND mfr.subfield = ''' || @@ -763,10 +790,19 @@ BEGIN jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey); jrow := jrow || ')) ' || my_alias || my_using || E'\n'; ELSE -- svf - jrow := jrow || 'id AS record, ' || node.quality || - ' AS quality FROM metabib.record_attr_flat mraf WHERE mraf.attr = ''' || - node.svf || ''' AND mraf.value ' || op || ' $2->''' || node.svf || ''') ' || - my_alias || my_using || E'\n'; + IF auth_heading IS NOT NULL THEN -- authority record + IF node.heading AND auth_heading <> '' THEN + jrow := jrow || 'id AS record, ' || node.quality || + ' AS quality FROM authority.record_entry are ' || + ' WHERE are.heading = ''' || auth_heading || ''''; + jrow := jrow || ') ' || my_alias || my_using || E'\n'; + END IF; + ELSE -- bib record + jrow := jrow || 'id AS record, ' || node.quality || + ' AS quality FROM metabib.record_attr_flat mraf WHERE mraf.attr = ''' || + node.svf || ''' AND mraf.value ' || op || ' $2->''' || node.svf || ''') ' || + my_alias || my_using || E'\n'; + END IF; END IF; INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow); END; @@ -1684,7 +1720,8 @@ CREATE TABLE vandelay.authority_match ( id BIGSERIAL PRIMARY KEY, queued_record BIGINT REFERENCES vandelay.queued_authority_record (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, eg_record BIGINT REFERENCES authority.record_entry (id) DEFERRABLE INITIALLY DEFERRED, - quality INT NOT NULL DEFAULT 0 + quality INT NOT NULL DEFAULT 0, + match_score INT NOT NULL DEFAULT 0 ); CREATE OR REPLACE FUNCTION vandelay.ingest_authority_marc ( ) RETURNS TRIGGER AS $$ @@ -1855,6 +1892,192 @@ CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_queue ( queue_id BIGI SELECT * FROM vandelay.auto_overlay_authority_queue( $1, NULL ); $$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION vandelay.match_set_test_authxml( + match_set_id INTEGER, record_xml TEXT +) RETURNS SETOF vandelay.match_set_test_result AS $$ +DECLARE + tags_rstore HSTORE; + heading TEXT; + coal TEXT; + joins TEXT; + query_ TEXT; + wq TEXT; + qvalue INTEGER; + rec RECORD; +BEGIN + tags_rstore := vandelay.flatten_marc_hstore(record_xml); + + SELECT normalize_heading INTO heading + FROM authority.normalize_heading(record_xml); + + CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER); + CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT); + + -- generate the where clause and return that directly (into wq), and as + -- a side-effect, populate the _vandelay_tmp_[qj]rows tables. + wq := vandelay.get_expr_from_match_set( + match_set_id, tags_rstore, heading); + + query_ := 'SELECT DISTINCT(record), '; + + -- qrows table is for the quality bits we add to the SELECT clause + SELECT STRING_AGG( + 'COALESCE(n' || q::TEXT || '.quality, 0)', ' + ' + ) INTO coal FROM _vandelay_tmp_qrows; + + -- our query string so far is the SELECT clause and the inital FROM. + -- no JOINs yet nor the WHERE clause + query_ := query_ || coal || ' AS quality ' || E'\n'; + + -- jrows table is for the joins we must make (and the real text conditions) + SELECT STRING_AGG(j, E'\n') INTO joins + FROM _vandelay_tmp_jrows; + + -- add those joins and the where clause to our query. + query_ := query_ || joins || E'\n'; + + query_ := query_ || 'JOIN authority.record_entry are ON (are.id = record) ' + || 'WHERE ' || wq || ' AND not are.deleted'; + + -- this will return rows of record,quality + FOR rec IN EXECUTE query_ USING tags_rstore LOOP + RETURN NEXT rec; + END LOOP; + + DROP TABLE _vandelay_tmp_qrows; + DROP TABLE _vandelay_tmp_jrows; + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.measure_auth_record_quality + ( xml TEXT, match_set_id INT ) RETURNS INT AS $_$ +DECLARE + out_q INT := 0; + rvalue TEXT; + test vandelay.match_set_quality%ROWTYPE; +BEGIN + + FOR test IN SELECT * FROM vandelay.match_set_quality + WHERE match_set = match_set_id LOOP + IF test.tag IS NOT NULL THEN + FOR rvalue IN SELECT value FROM vandelay.flatten_marc( xml ) + WHERE tag = test.tag AND subfield = test.subfield LOOP + IF test.value = rvalue THEN + out_q := out_q + test.quality; + END IF; + END LOOP; + END IF; + END LOOP; + + RETURN out_q; +END; +$_$ LANGUAGE PLPGSQL; + + + +CREATE OR REPLACE FUNCTION vandelay.match_authority_record() RETURNS TRIGGER AS $func$ +DECLARE + incoming_existing_id TEXT; + test_result vandelay.match_set_test_result%ROWTYPE; + tmp_rec BIGINT; + match_set INT; +BEGIN + IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN + RETURN NEW; + END IF; + + DELETE FROM vandelay.authority_match WHERE queued_record = NEW.id; + + SELECT q.match_set INTO match_set FROM vandelay.authority_queue q WHERE q.id = NEW.queue; + + IF match_set IS NOT NULL THEN + NEW.quality := vandelay.measure_auth_record_quality( NEW.marc, match_set ); + END IF; + + -- Perfect matches on 901$c exit early with a match with high quality. + incoming_existing_id := + oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]', NEW.marc); + + IF incoming_existing_id IS NOT NULL AND incoming_existing_id != '' THEN + SELECT id INTO tmp_rec FROM authority.record_entry WHERE id = incoming_existing_id::bigint; + IF tmp_rec IS NOT NULL THEN + INSERT INTO vandelay.authority_match (queued_record, eg_record, match_score, quality) + SELECT + NEW.id, + b.id, + 9999, + -- note: no match_set means quality==0 + vandelay.measure_auth_record_quality( b.marc, match_set ) + FROM authority.record_entry b + WHERE id = incoming_existing_id::bigint; + END IF; + END IF; + + IF match_set IS NULL THEN + RETURN NEW; + END IF; + + FOR test_result IN SELECT * FROM + vandelay.match_set_test_authxml(match_set, NEW.marc) LOOP + + INSERT INTO vandelay.authority_match ( queued_record, eg_record, match_score, quality ) + SELECT + NEW.id, + test_result.record, + test_result.quality, + vandelay.measure_auth_record_quality( b.marc, match_set ) + FROM authority.record_entry b + WHERE id = test_result.record; + + END LOOP; + + RETURN NEW; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE TRIGGER zz_match_auths_trigger + BEFORE INSERT OR UPDATE ON vandelay.queued_authority_record + FOR EACH ROW EXECUTE PROCEDURE vandelay.match_authority_record(); + +CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_record_with_best ( import_id BIGINT, merge_profile_id INT, lwm_ratio_value_p NUMERIC ) RETURNS BOOL AS $$ +DECLARE + eg_id BIGINT; + lwm_ratio_value NUMERIC; +BEGIN + + lwm_ratio_value := COALESCE(lwm_ratio_value_p, 0.0); + + PERFORM * FROM vandelay.queued_authority_record WHERE import_time IS NOT NULL AND id = import_id; + + IF FOUND THEN + -- RAISE NOTICE 'already imported, cannot auto-overlay' + RETURN FALSE; + END IF; + + SELECT m.eg_record INTO eg_id + FROM vandelay.authority_match m + JOIN vandelay.queued_authority_record qr ON (m.queued_record = qr.id) + JOIN vandelay.authority_queue q ON (qr.queue = q.id) + JOIN authority.record_entry r ON (r.id = m.eg_record) + WHERE m.queued_record = import_id + AND qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC >= lwm_ratio_value + ORDER BY m.match_score DESC, -- required match score + qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC DESC, -- quality tie breaker + m.id -- when in doubt, use the first match + LIMIT 1; + + IF eg_id IS NULL THEN + -- RAISE NOTICE 'incoming record is not of high enough quality'; + RETURN FALSE; + END IF; + + RETURN vandelay.overlay_authority_record( import_id, eg_id, merge_profile_id ); +END; +$$ LANGUAGE PLPGSQL; + + + -- Vandelay (for importing and exporting records) 012.schema.vandelay.sql --INSERT INTO vandelay.bib_attr_definition ( id, code, description, xpath ) VALUES (1, 'title', oils_i18n_gettext(1, 'vqbrad', 'Title of work', 'description'),'//*[@tag="245"]/*[contains("abcmnopr",@code)]'); diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority_match_sets.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority_match_sets.sql new file mode 100644 index 0000000000..149ba2525d --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority_match_sets.sql @@ -0,0 +1,371 @@ +BEGIN; + +ALTER TABLE vandelay.authority_match + ADD COLUMN match_score INT NOT NULL DEFAULT 0; + +-- support heading=TRUE match set points +ALTER TABLE vandelay.match_set_point + ADD COLUMN heading BOOLEAN NOT NULL DEFAULT FALSE, + DROP CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_bo, + ADD CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_heading_or_a_bo + CHECK ( + (tag IS NOT NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NULL) OR + (tag IS NULL AND svf IS NOT NULL AND heading IS FALSE AND bool_op IS NULL) OR + (tag IS NULL AND svf IS NULL AND heading IS TRUE AND bool_op IS NULL) OR + (tag IS NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NOT NULL) + ); + +CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set( + match_set_id INTEGER, + tags_rstore HSTORE, + auth_heading TEXT +) RETURNS TEXT AS $$ +DECLARE + root vandelay.match_set_point; +BEGIN + SELECT * INTO root FROM vandelay.match_set_point + WHERE parent IS NULL AND match_set = match_set_id; + + RETURN vandelay.get_expr_from_match_set_point( + root, tags_rstore, auth_heading); +END; +$$ LANGUAGE PLPGSQL; + +-- backwards compat version so we don't have +-- to modify vandelay.match_set_test_marcxml() +CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set( + match_set_id INTEGER, + tags_rstore HSTORE +) RETURNS TEXT AS $$ +BEGIN + RETURN vandelay.get_expr_from_match_set( + match_set_id, tags_rstore, NULL); +END; +$$ LANGUAGE PLPGSQL; + + +DROP FUNCTION IF EXISTS + vandelay.get_expr_from_match_set_point(vandelay.match_set_point, HSTORE); + +CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point( + node vandelay.match_set_point, + tags_rstore HSTORE, + auth_heading TEXT +) RETURNS TEXT AS $$ +DECLARE + q TEXT; + i INTEGER; + this_op TEXT; + children INTEGER[]; + child vandelay.match_set_point; +BEGIN + SELECT ARRAY_AGG(id) INTO children FROM vandelay.match_set_point + WHERE parent = node.id; + + IF ARRAY_LENGTH(children, 1) > 0 THEN + this_op := vandelay._get_expr_render_one(node); + q := '('; + i := 1; + WHILE children[i] IS NOT NULL LOOP + SELECT * INTO child FROM vandelay.match_set_point + WHERE id = children[i]; + IF i > 1 THEN + q := q || ' ' || this_op || ' '; + END IF; + i := i + 1; + q := q || vandelay.get_expr_from_match_set_point( + child, tags_rstore, auth_heading); + END LOOP; + q := q || ')'; + RETURN q; + ELSIF node.bool_op IS NULL THEN + PERFORM vandelay._get_expr_push_qrow(node); + PERFORM vandelay._get_expr_push_jrow(node, tags_rstore, auth_heading); + RETURN vandelay._get_expr_render_one(node); + ELSE + RETURN ''; + END IF; +END; +$$ LANGUAGE PLPGSQL; + + +DROP FUNCTION IF EXISTS + vandelay._get_expr_push_jrow(vandelay.match_set_point, HSTORE); + +CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow( + node vandelay.match_set_point, + tags_rstore HSTORE, + auth_heading TEXT +) RETURNS VOID AS $$ +DECLARE + jrow TEXT; + my_alias TEXT; + op TEXT; + tagkey TEXT; + caseless BOOL; + jrow_count INT; + my_using TEXT; + my_join TEXT; + rec_table TEXT; +BEGIN + -- remember $1 is tags_rstore, and $2 is svf_rstore + -- a non-NULL auth_heading means we're matching authority records + + IF auth_heading IS NOT NULL THEN + rec_table := 'authority.full_rec'; + ELSE + rec_table := 'metabib.full_rec'; + END IF; + + caseless := FALSE; + SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows; + IF jrow_count > 0 THEN + my_using := ' USING (record)'; + my_join := 'FULL OUTER JOIN'; + ELSE + my_using := ''; + my_join := 'FROM'; + END IF; + + IF node.tag IS NOT NULL THEN + caseless := (node.tag IN ('020', '022', '024')); + tagkey := node.tag; + IF node.subfield IS NOT NULL THEN + tagkey := tagkey || node.subfield; + END IF; + END IF; + + IF node.negate THEN + IF caseless THEN + op := 'NOT LIKE'; + ELSE + op := '<>'; + END IF; + ELSE + IF caseless THEN + op := 'LIKE'; + ELSE + op := '='; + END IF; + END IF; + + my_alias := 'n' || node.id::TEXT; + + jrow := my_join || ' (SELECT *, '; + IF node.tag IS NOT NULL THEN + jrow := jrow || node.quality || + ' AS quality FROM ' || rec_table || ' mfr WHERE mfr.tag = ''' || + node.tag || ''''; + IF node.subfield IS NOT NULL THEN + jrow := jrow || ' AND mfr.subfield = ''' || + node.subfield || ''''; + END IF; + jrow := jrow || ' AND ('; + jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey); + jrow := jrow || ')) ' || my_alias || my_using || E'\n'; + ELSE -- svf + IF auth_heading IS NOT NULL THEN -- authority record + IF node.heading AND auth_heading <> '' THEN + jrow := jrow || 'id AS record, ' || node.quality || + ' AS quality FROM authority.record_entry are ' || + ' WHERE are.heading = ''' || auth_heading || ''''; + jrow := jrow || ') ' || my_alias || my_using || E'\n'; + END IF; + ELSE -- bib record + jrow := jrow || 'id AS record, ' || node.quality || + ' AS quality FROM metabib.record_attr_flat mraf WHERE mraf.attr = ''' || + node.svf || ''' AND mraf.value ' || op || ' $2->''' || node.svf || ''') ' || + my_alias || my_using || E'\n'; + END IF; + END IF; + INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow); +END; +$$ LANGUAGE PLPGSQL; + + +CREATE OR REPLACE FUNCTION vandelay.match_set_test_authxml( + match_set_id INTEGER, record_xml TEXT +) RETURNS SETOF vandelay.match_set_test_result AS $$ +DECLARE + tags_rstore HSTORE; + heading TEXT; + coal TEXT; + joins TEXT; + query_ TEXT; + wq TEXT; + qvalue INTEGER; + rec RECORD; +BEGIN + tags_rstore := vandelay.flatten_marc_hstore(record_xml); + + SELECT normalize_heading INTO heading + FROM authority.normalize_heading(record_xml); + + CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER); + CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT); + + -- generate the where clause and return that directly (into wq), and as + -- a side-effect, populate the _vandelay_tmp_[qj]rows tables. + wq := vandelay.get_expr_from_match_set( + match_set_id, tags_rstore, heading); + + query_ := 'SELECT DISTINCT(record), '; + + -- qrows table is for the quality bits we add to the SELECT clause + SELECT STRING_AGG( + 'COALESCE(n' || q::TEXT || '.quality, 0)', ' + ' + ) INTO coal FROM _vandelay_tmp_qrows; + + -- our query string so far is the SELECT clause and the inital FROM. + -- no JOINs yet nor the WHERE clause + query_ := query_ || coal || ' AS quality ' || E'\n'; + + -- jrows table is for the joins we must make (and the real text conditions) + SELECT STRING_AGG(j, E'\n') INTO joins + FROM _vandelay_tmp_jrows; + + -- add those joins and the where clause to our query. + query_ := query_ || joins || E'\n'; + + query_ := query_ || 'JOIN authority.record_entry are ON (are.id = record) ' + || 'WHERE ' || wq || ' AND not are.deleted'; + + -- this will return rows of record,quality + FOR rec IN EXECUTE query_ USING tags_rstore LOOP + RETURN NEXT rec; + END LOOP; + + DROP TABLE _vandelay_tmp_qrows; + DROP TABLE _vandelay_tmp_jrows; + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.measure_auth_record_quality + ( xml TEXT, match_set_id INT ) RETURNS INT AS $_$ +DECLARE + out_q INT := 0; + rvalue TEXT; + test vandelay.match_set_quality%ROWTYPE; +BEGIN + + FOR test IN SELECT * FROM vandelay.match_set_quality + WHERE match_set = match_set_id LOOP + IF test.tag IS NOT NULL THEN + FOR rvalue IN SELECT value FROM vandelay.flatten_marc( xml ) + WHERE tag = test.tag AND subfield = test.subfield LOOP + IF test.value = rvalue THEN + out_q := out_q + test.quality; + END IF; + END LOOP; + END IF; + END LOOP; + + RETURN out_q; +END; +$_$ LANGUAGE PLPGSQL; + + + +CREATE OR REPLACE FUNCTION vandelay.match_authority_record() RETURNS TRIGGER AS $func$ +DECLARE + incoming_existing_id TEXT; + test_result vandelay.match_set_test_result%ROWTYPE; + tmp_rec BIGINT; + match_set INT; +BEGIN + IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN + RETURN NEW; + END IF; + + DELETE FROM vandelay.authority_match WHERE queued_record = NEW.id; + + SELECT q.match_set INTO match_set FROM vandelay.authority_queue q WHERE q.id = NEW.queue; + + IF match_set IS NOT NULL THEN + NEW.quality := vandelay.measure_auth_record_quality( NEW.marc, match_set ); + END IF; + + -- Perfect matches on 901$c exit early with a match with high quality. + incoming_existing_id := + oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]', NEW.marc); + + IF incoming_existing_id IS NOT NULL AND incoming_existing_id != '' THEN + SELECT id INTO tmp_rec FROM authority.record_entry WHERE id = incoming_existing_id::bigint; + IF tmp_rec IS NOT NULL THEN + INSERT INTO vandelay.authority_match (queued_record, eg_record, match_score, quality) + SELECT + NEW.id, + b.id, + 9999, + -- note: no match_set means quality==0 + vandelay.measure_auth_record_quality( b.marc, match_set ) + FROM authority.record_entry b + WHERE id = incoming_existing_id::bigint; + END IF; + END IF; + + IF match_set IS NULL THEN + RETURN NEW; + END IF; + + FOR test_result IN SELECT * FROM + vandelay.match_set_test_authxml(match_set, NEW.marc) LOOP + + INSERT INTO vandelay.authority_match ( queued_record, eg_record, match_score, quality ) + SELECT + NEW.id, + test_result.record, + test_result.quality, + vandelay.measure_auth_record_quality( b.marc, match_set ) + FROM authority.record_entry b + WHERE id = test_result.record; + + END LOOP; + + RETURN NEW; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE TRIGGER zz_match_auths_trigger + BEFORE INSERT OR UPDATE ON vandelay.queued_authority_record + FOR EACH ROW EXECUTE PROCEDURE vandelay.match_authority_record(); + +CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_record_with_best ( import_id BIGINT, merge_profile_id INT, lwm_ratio_value_p NUMERIC ) RETURNS BOOL AS $$ +DECLARE + eg_id BIGINT; + lwm_ratio_value NUMERIC; +BEGIN + + lwm_ratio_value := COALESCE(lwm_ratio_value_p, 0.0); + + PERFORM * FROM vandelay.queued_authority_record WHERE import_time IS NOT NULL AND id = import_id; + + IF FOUND THEN + -- RAISE NOTICE 'already imported, cannot auto-overlay' + RETURN FALSE; + END IF; + + SELECT m.eg_record INTO eg_id + FROM vandelay.authority_match m + JOIN vandelay.queued_authority_record qr ON (m.queued_record = qr.id) + JOIN vandelay.authority_queue q ON (qr.queue = q.id) + JOIN authority.record_entry r ON (r.id = m.eg_record) + WHERE m.queued_record = import_id + AND qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC >= lwm_ratio_value + ORDER BY m.match_score DESC, -- required match score + qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC DESC, -- quality tie breaker + m.id -- when in doubt, use the first match + LIMIT 1; + + IF eg_id IS NULL THEN + -- RAISE NOTICE 'incoming record is not of high enough quality'; + RETURN FALSE; + END IF; + + RETURN vandelay.overlay_authority_record( import_id, eg_id, merge_profile_id ); +END; +$$ LANGUAGE PLPGSQL; + + +COMMIT; diff --git a/Open-ILS/src/templates/conify/global/vandelay/match_set.tt2 b/Open-ILS/src/templates/conify/global/vandelay/match_set.tt2 index da14f89831..819ee9ec3d 100644 --- a/Open-ILS/src/templates/conify/global/vandelay/match_set.tt2 +++ b/Open-ILS/src/templates/conify/global/vandelay/match_set.tt2 @@ -38,7 +38,7 @@ [%# for the origin of these hard coded options, see the definition of vandelay.match_set.mtype in 012.schema.vandelay.sql %] - + diff --git a/Open-ILS/src/templates/conify/global/vandelay/match_set_tree.tt2 b/Open-ILS/src/templates/conify/global/vandelay/match_set_tree.tt2 index 1f463bc17d..a08929e6ff 100644 --- a/Open-ILS/src/templates/conify/global/vandelay/match_set_tree.tt2 +++ b/Open-ILS/src/templates/conify/global/vandelay/match_set_tree.tt2 @@ -84,9 +84,10 @@
- Add new - + [% l('Add new') %] + +
diff --git a/Open-ILS/src/templates/vandelay/inc/matches.tt2 b/Open-ILS/src/templates/vandelay/inc/matches.tt2 index 0fc38172b9..3b00351289 100644 --- a/Open-ILS/src/templates/vandelay/inc/matches.tt2 +++ b/Open-ILS/src/templates/vandelay/inc/matches.tt2 @@ -1,6 +1,6 @@

[% l('Import Matches') %]


diff --git a/Open-ILS/src/templates/vandelay/inc/queue.tt2 b/Open-ILS/src/templates/vandelay/inc/queue.tt2 index 918a04cf85..e3e2035f9f 100644 --- a/Open-ILS/src/templates/vandelay/inc/queue.tt2 +++ b/Open-ILS/src/templates/vandelay/inc/queue.tt2 @@ -31,6 +31,8 @@ [% l('Records in Queue:') %] [% l('Records Imported:') %] [% l('Record Import Failures') %] + + [% l('Items in Queue') %] [% l('Items Imported') %] [% l('Item Import Failures') %] diff --git a/Open-ILS/web/js/dojo/openils/vandelay/nls/match_set.js b/Open-ILS/web/js/dojo/openils/vandelay/nls/match_set.js index be2c79bc30..99a761efd0 100644 --- a/Open-ILS/web/js/dojo/openils/vandelay/nls/match_set.js +++ b/Open-ILS/web/js/dojo/openils/vandelay/nls/match_set.js @@ -12,5 +12,6 @@ "WORKING_MP_HERE": "Choose from among the three buttons above to add a new match point.", "WORKING_QM_HERE": "Use buttons above and to the right to add new quality metrics.", "SVF": "Record Attribute", - "MATCH_SCORE": "Match score ${0}" + "MATCH_SCORE": "Match score ${0}", + "HEADING_MATCH": "Normalized Heading" } diff --git a/Open-ILS/web/js/ui/default/conify/global/vandelay/match_set.js b/Open-ILS/web/js/ui/default/conify/global/vandelay/match_set.js index 1de99e4c69..a00ed0673e 100644 --- a/Open-ILS/web/js/ui/default/conify/global/vandelay/match_set.js +++ b/Open-ILS/web/js/ui/default/conify/global/vandelay/match_set.js @@ -120,6 +120,27 @@ var NodeEditorAbstract = { ); return rows; }, + "heading" : function() { + var tr = dojo.create("tr"); + dojo.create( + "label", { + "for": "heading-input", + "innerHTML": localeStrings.HEADING_MATCH + }, dojo.create("td", null, tr) + ); + + dojo.create( + "input", { + "id": "heading-input", + "type": "checkbox", + "checked": true, + "disabled": true, // if you don't want it, don't use it. + "fmfield": "heading" + }, dojo.create("td", null, tr) + ); + + return [tr]; + }, "bool_op": function() { var tr = dojo.create("tr"); dojo.create( @@ -246,13 +267,21 @@ function QualityNodeEditor() { function NodeEditor() { var self = this; - this.foi = ["tag", "svf", "bool_op"]; /* Fields of Interest - starting points for UI */ + this.foi = ["tag", "svf", "heading", "bool_op"]; /* Fields of Interest - starting points for UI */ this._init = function(dnd_source, node_editor_container) { this._consistent_controls_query = "[consistent-controls], [point-controls]"; this.dnd_source = dnd_source; this.node_editor_container = dojo.byId(node_editor_container); + + // hide match point types which are not relevent to + // the current record type + if (match_set.mtype() == 'authority') { + openils.Util.hide('record-attr-btn'); + } else { + openils.Util.hide('heading-match-btn'); + } }; this.clear = function() { @@ -353,6 +382,10 @@ function render_vmsp_label(point, minimal) { localeStrings.MATCH_SCORE, [point.quality()] ) ); + } else if (point.heading() === true || point.heading() == 't') { + return localeStrings.HEADING_MATCH + + " | " + dojo.string.substitute( + localeStrings.MATCH_SCORE, [point.quality()]); } else { return (openils.Util.isTrue(point.negate()) ? "NOT " : "") + point.tag() + " \u2021" + point.subfield() + (minimal ? "" : " | " + @@ -545,7 +578,8 @@ function my_init() { /* No-one should have hundreds of these or anything, but theoretically * this could be problematic with a big enough list of crad objects. */ - _crads = pcrud.retrieveAll("crad", {"order_by": {"crad": "label"}}); + _crads = match_set.mtype() == 'authority' ? [] : + pcrud.retrieveAll("crad", {"order_by": {"crad": "label"}}); var match_set_tree = fieldmapper.standardRequest( ["open-ils.vandelay", "open-ils.vandelay.match_set.get_tree"], diff --git a/Open-ILS/web/js/ui/default/vandelay/vandelay.js b/Open-ILS/web/js/ui/default/vandelay/vandelay.js index 512c7dcb37..ba6c9bbd54 100644 --- a/Open-ILS/web/js/ui/default/vandelay/vandelay.js +++ b/Open-ILS/web/js/ui/default/vandelay/vandelay.js @@ -626,8 +626,12 @@ function vlLoadMatchUI(recId) { var retrieve = ['open-ils.search', 'open-ils.search.biblio.record_entry.slim.retrieve']; var params = [records]; if(currentType == 'auth') { - retrieve = ['open-ils.cat', 'open-ils.cat.authority.record.retrieve']; - params = [authtoken, records, {clear_marc:1}]; + retrieve = ['open-ils.pcrud', 'open-ils.pcrud.search.are.atomic']; + // retrieve all authority record fields except 'marc' for lightness + var fields = fieldmapper.IDL.fmclasses.are.fields + .filter(function(f) {return (!f.virtual && f.name != 'marc')}) + .map(function(f) {return f.name}); + params = [authtoken, {id : records}, {select : {are : fields}}] } fieldmapper.standardRequest( @@ -641,7 +645,7 @@ function vlLoadMatchUI(recId) { /* ui mangling */ displayGlobalDiv('vl-match-div'); - resetVlMatchGridLayout(); + resetVlMatchGridLayout(currentType); currentMatchedRecords = recs; vlMatchGrid.setStructure(vlMatchGridLayout); @@ -1148,6 +1152,12 @@ function vlToggleQueueGridSelect() { var handleRetrieveRecords = function() { buildRecordGrid(currentType); + + if (currentType.match(/auth/)) { + openils.Util.hide('vl-queue-summary-import-item-summary'); + } else { + openils.Util.show('vl-queue-summary-import-item-summary', 'table-body'); + } vlFetchQueueSummary(currentQueueId, currentType, function(summary) { dojo.byId('vl-queue-summary-name').innerHTML = summary.queue.name();