From da70b778268afff19c3dd4f2e9e4c9e85cfeb7e3 Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Thu, 16 Jun 2011 09:50:25 -0400 Subject: [PATCH] Merging branch user/dbs/lp797304_lp797307 Signed-off-by: Mike Rylander Cherry picking the following: commit 7be2a32620587af27de4316e42a308e913e9314c Author: Dan Scott Date: Tue Jun 14 22:35:26 2011 -0400 Add a stored procedure to reingest problematic URIs Assuming that href = label in asset.uri is a red enough flag for the problem in LP 797307 where the perfectly good label was being thrown away, reingest the unique set of records connected with such asset.uri rows. Signed-off-by: Dan Scott commit 39fe0c4a8a3074ff4c1e427ad9f00a0760bac260 Author: Dan Scott Date: Tue Jun 14 21:54:51 2011 -0400 Add unwrapped upgrade script for biblio.extract_located_uris See XXXX.biblio.extract_located_uris.sql for the upgrade script, waiting to be signed off and committed. Signed-off-by: Dan Scott create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.biblio.extract_located_uris.sql commit 7e6d2bfd7e9a5cbe57e4aacdf0b4b015a566883d Author: Dan Scott Date: Tue Jun 14 21:34:15 2011 -0400 Avoid duplicate row issues in biblio.extract_located_uris As there is currently no unique constraint on the non-ID columns of asset.uri, ensure that we only select a single ID from the tables until we dedupe the tables. Use an ORDER BY to grab the matching row with the lowest ID so we can clear out the rows with higher IDs. Signed-off-by: Dan Scott commit e79a805299ec734a29ee5bfa77dc74bc1fd208cd Author: Dan Scott Date: Tue Jun 14 14:02:49 2011 -0400 Fix LP 797304 and 797307 - asset.uri parsing bugs Per 797304: one would expect asset.uri.label to be populated with the value of 856 $y, however, it was getting populated with 856 $u - perhaps as a result of the ordering of array entries in XPATH() not matching the XPath expression. Instead of including 856 $u in the XPath expression for uri_label, just assign its value to uri_label if uri_label is otherwise NULL. Per 797307: biblio.extract_located_uris() tries to reuse an existing active asset.uri entry rather than create a new row; however, it does not handle the case where there is no public note aka "use restriction" and will end up creating a duplicate row every time the record is reingested. Teach it to handle NULL values properly. Signed-off-by: Dan Scott --- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 36 ++++-- .../upgrade/XXXX.biblio.extract_located_uris.sql | 137 +++++++++++++++++++++ 2 files changed, 164 insertions(+), 9 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.biblio.extract_located_uris.sql diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index 1ccbd573ba..17a238332d 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -853,9 +853,13 @@ BEGIN uri_xml := uris[i]; uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1]; - uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()|//*[@code="u"]/text()',uri_xml))[1]; + uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1]; uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1]; - CONTINUE WHEN uri_href IS NULL OR uri_label IS NULL; + + IF uri_label IS NULL THEN + uri_label := uri_href; + END IF; + CONTINUE WHEN uri_href IS NULL; -- Get the distinct list of libraries wanting to use SELECT ARRAY_ACCUM( @@ -875,13 +879,27 @@ BEGIN IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN -- look for a matching uri - SELECT id INTO uri_id FROM asset.uri WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active; - IF NOT FOUND THEN -- create one - INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use); - IF uri_use IS NULL THEN - SELECT id INTO uri_id FROM asset.uri WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active; - ELSE - SELECT id INTO uri_id FROM asset.uri WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active; + IF uri_use IS NULL THEN + SELECT id INTO uri_id + FROM asset.uri + WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active + ORDER BY id LIMIT 1; + IF NOT FOUND THEN -- create one + INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use); + SELECT id INTO uri_id + FROM asset.uri + WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active; + END IF; + ELSE + SELECT id INTO uri_id + FROM asset.uri + WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active + ORDER BY id LIMIT 1; + IF NOT FOUND THEN -- create one + INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use); + SELECT id INTO uri_id + FROM asset.uri + WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active; END IF; END IF; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.biblio.extract_located_uris.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.biblio.extract_located_uris.sql new file mode 100644 index 0000000000..434443df05 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.biblio.extract_located_uris.sql @@ -0,0 +1,137 @@ +CREATE OR REPLACE FUNCTION biblio.extract_located_uris( bib_id BIGINT, marcxml TEXT, editor_id INT ) RETURNS VOID AS $func$ +DECLARE + uris TEXT[]; + uri_xml TEXT; + uri_label TEXT; + uri_href TEXT; + uri_use TEXT; + uri_owner_list TEXT[]; + uri_owner TEXT; + uri_owner_id INT; + uri_id INT; + uri_cn_id INT; + uri_map_id INT; +BEGIN + + -- Clear any URI mappings and call numbers for this bib. + -- This leads to acn / auricnm inflation, but also enables + -- old acn/auricnm's to go away and for bibs to be deleted. + FOR uri_cn_id IN SELECT id FROM asset.call_number WHERE record = bib_id AND label = '##URI##' AND NOT deleted LOOP + DELETE FROM asset.uri_call_number_map WHERE call_number = uri_cn_id; + DELETE FROM asset.call_number WHERE id = uri_cn_id; + END LOOP; + + uris := oils_xpath('//*[@tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]',marcxml); + IF ARRAY_UPPER(uris,1) > 0 THEN + FOR i IN 1 .. ARRAY_UPPER(uris, 1) LOOP + -- First we pull info out of the 856 + uri_xml := uris[i]; + + uri_href := (oils_xpath('//*[@code="u"]/text()',uri_xml))[1]; + uri_label := (oils_xpath('//*[@code="y"]/text()|//*[@code="3"]/text()',uri_xml))[1]; + uri_use := (oils_xpath('//*[@code="z"]/text()|//*[@code="2"]/text()|//*[@code="n"]/text()',uri_xml))[1]; + + IF uri_label IS NULL THEN + uri_label := uri_href; + END IF; + CONTINUE WHEN uri_href IS NULL; + + -- Get the distinct list of libraries wanting to use + SELECT ARRAY_ACCUM( + DISTINCT REGEXP_REPLACE( + x, + $re$^.*?\((\w+)\).*$$re$, + E'\\1' + ) + ) INTO uri_owner_list + FROM UNNEST( + oils_xpath( + '//*[@code="9"]/text()|//*[@code="w"]/text()|//*[@code="n"]/text()', + uri_xml + ) + )x; + + IF ARRAY_UPPER(uri_owner_list,1) > 0 THEN + + -- look for a matching uri + IF uri_use IS NULL THEN + SELECT id INTO uri_id + FROM asset.uri + WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active + ORDER BY id LIMIT 1; + IF NOT FOUND THEN -- create one + INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use); + SELECT id INTO uri_id + FROM asset.uri + WHERE label = uri_label AND href = uri_href AND use_restriction IS NULL AND active; + END IF; + ELSE + SELECT id INTO uri_id + FROM asset.uri + WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active + ORDER BY id LIMIT 1; + IF NOT FOUND THEN -- create one + INSERT INTO asset.uri (label, href, use_restriction) VALUES (uri_label, uri_href, uri_use); + SELECT id INTO uri_id + FROM asset.uri + WHERE label = uri_label AND href = uri_href AND use_restriction = uri_use AND active; + END IF; + END IF; + + FOR j IN 1 .. ARRAY_UPPER(uri_owner_list, 1) LOOP + uri_owner := uri_owner_list[j]; + + SELECT id INTO uri_owner_id FROM actor.org_unit WHERE shortname = uri_owner; + CONTINUE WHEN NOT FOUND; + + -- we need a call number to link through + SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted; + IF NOT FOUND THEN + INSERT INTO asset.call_number (owning_lib, record, create_date, edit_date, creator, editor, label) + VALUES (uri_owner_id, bib_id, 'now', 'now', editor_id, editor_id, '##URI##'); + SELECT id INTO uri_cn_id FROM asset.call_number WHERE owning_lib = uri_owner_id AND record = bib_id AND label = '##URI##' AND NOT deleted; + END IF; + + -- now, link them if they're not already + SELECT id INTO uri_map_id FROM asset.uri_call_number_map WHERE call_number = uri_cn_id AND uri = uri_id; + IF NOT FOUND THEN + INSERT INTO asset.uri_call_number_map (call_number, uri) VALUES (uri_cn_id, uri_id); + END IF; + + END LOOP; + + END IF; + + END LOOP; + END IF; + + RETURN; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION biblio.reingest_uris() RETURNS VOID AS $func$ +DECLARE + rec_id BIGINT; +BEGIN + -- Get the distinct set of record IDs that need to be reingested + -- (assuming that href = label is a reasonable red flag) + FOR rec_id IN SELECT rec_uris.id FROM ( + SELECT acn.record AS id + FROM asset.call_number acn + INNER JOIN asset.uri_call_number_map auricnm ON auricnm.call_number = acn.id + INNER JOIN asset.uri auri ON auri.id = auricnm.uri + WHERE auri.href = auri.label + GROUP BY acn.record + ORDER BY acn.record + ) AS rec_uris + LOOP + -- Reingest the offending records + PERFORM biblio.extract_located_uris(rec_id, bre.marc, 1) + FROM biblio.record_entry bre + WHERE bre.id = rec_id; + END LOOP; +END; +$func$ LANGUAGE PLPGSQL; + +-- Kick off the reingest; this may take a while +SELECT biblio.reingest_uris(); -- 2.11.0