From: Ben Shum <bshum@biblio.org> Date: Tue, 3 Feb 2015 20:54:05 +0000 (-0500) Subject: LP#1414112: Stamping upgrade script for spaces in record attr values X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=9452d114f63b2340ced3c301a05bf2c17f4d53df;p=evergreen%2Fmasslnc.git LP#1414112: Stamping upgrade script for spaces in record attr values Signed-off-by: Ben Shum <bshum@biblio.org> --- diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index aeda1b0016..b132005e55 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps BEFORE INSERT OR UPDATE ON config.db_patch_dependencies FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates'); -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0903', :eg_version); -- dyrcona/kmlussier/bshum +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0904', :eg_version); -- miker/gmcharlt/csharp/bshum CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/0904.schema.allow_spaces_as_ff_attr_values.sql b/Open-ILS/src/sql/Pg/upgrade/0904.schema.allow_spaces_as_ff_attr_values.sql new file mode 100644 index 0000000000..5e306b844a --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0904.schema.allow_spaces_as_ff_attr_values.sql @@ -0,0 +1,250 @@ +-- Evergreen DB patch 0904.schema.allow_spaces_as_ff_attr_values.sql +-- +-- LP#1414112 - don't over-normalize record attribute values to +-- exclude all values that contain only blanks +-- +BEGIN; + +-- check whether patch can be applied +SELECT evergreen.upgrade_deps_block_check('0904', :eg_version); + +-- The code for "uncoded" audience is a space, but upgraded DBs may have a blank. +UPDATE config.coded_value_map + SET code = ' ' + WHERE ctype = 'audience' + AND code = '' + AND ( + SELECT COUNT(*) + FROM config.coded_value_map + WHERE ctype = 'audience' + AND code = ' ') = 0; + +WITH ccvm AS ( + SELECT id + FROM config.coded_value_map + WHERE ctype = 'audience' + AND code = ' ' +) UPDATE metabib.record_attr_vector_list + SET vlist = vlist + intset(ccvm.id) + FROM ccvm + WHERE source IN ( + SELECT record + FROM metabib.real_full_rec + WHERE tag = '008' + AND substring(value,23,1) = ' ' + ); + +CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$ +DECLARE + transformed_xml TEXT; + rmarc TEXT := prmarc; + tmp_val TEXT; + prev_xfrm TEXT; + normalizer RECORD; + xfrm config.xml_transform%ROWTYPE; + attr_vector INT[] := '{}'::INT[]; + attr_vector_tmp INT[]; + attr_list TEXT[] := pattr_list; + attr_value TEXT[]; + norm_attr_value TEXT[]; + tmp_xml TEXT; + attr_def config.record_attr_definition%ROWTYPE; + ccvm_row config.coded_value_map%ROWTYPE; +BEGIN + + IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete + SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition; + END IF; + + IF rmarc IS NULL THEN + SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid; + END IF; + + FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP + + attr_value := '{}'::TEXT[]; + norm_attr_value := '{}'::TEXT[]; + attr_vector_tmp := '{}'::INT[]; + + SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1; + + -- tag+sf attrs only support SVF + IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection + SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value + FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x + WHERE record = rid + AND tag LIKE attr_def.tag + AND CASE + WHEN attr_def.sf_list IS NOT NULL + THEN POSITION(subfield IN attr_def.sf_list) > 0 + ELSE TRUE + END + GROUP BY tag + ORDER BY tag + LIMIT 1; + + ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field + attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field); + + IF NOT attr_def.multi THEN + attr_value := ARRAY[attr_value[1]]; + END IF; + + ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression + + SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format; + + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(rmarc,xfrm.xslt); + ELSE + transformed_xml := rmarc; + END IF; + + prev_xfrm := xfrm.name; + END IF; + + IF xfrm.name IS NULL THEN + -- just grab the marcxml (empty) transform + SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1; + prev_xfrm := xfrm.name; + END IF; + + FOR tmp_xml IN SELECT oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP + tmp_val := oils_xpath_string( + '//*', + tmp_xml, + COALESCE(attr_def.joiner,' '), + ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] + ); + IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN + attr_value := attr_value || tmp_val; + EXIT WHEN NOT attr_def.multi; + END IF; + END LOOP; + + ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map + SELECT ARRAY_AGG(m.value) INTO attr_value + FROM vandelay.marc21_physical_characteristics(rmarc) v + LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value) + WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '') + AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) ); + + IF NOT attr_def.multi THEN + attr_value := ARRAY[attr_value[1]]; + END IF; + + END IF; + + -- apply index normalizers to attr_value + FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP + FOR normalizer IN + SELECT n.func AS func, + n.param_count AS param_count, + m.params AS params + FROM config.index_normalizer n + JOIN config.record_attr_index_norm_map m ON (m.norm = n.id) + WHERE attr = attr_def.name + ORDER BY m.pos LOOP + EXECUTE 'SELECT ' || normalizer.func || '(' || + COALESCE( quote_literal( tmp_val ), 'NULL' ) || + CASE + WHEN normalizer.param_count > 0 + THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') + ELSE '' + END || + ')' INTO tmp_val; + + END LOOP; + IF tmp_val IS NOT NULL AND tmp_val <> '' THEN + -- note that a string that contains only blanks + -- is a valid value for some attributes + norm_attr_value := norm_attr_value || tmp_val; + END IF; + END LOOP; + + IF attr_def.filter THEN + -- Create unknown uncontrolled values and find the IDs of the values + IF ccvm_row.id IS NULL THEN + FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP + IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN + BEGIN -- use subtransaction to isolate unique constraint violations + INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val ); + EXCEPTION WHEN unique_violation THEN END; + END IF; + END LOOP; + + SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value ); + ELSE + SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value ); + END IF; + + -- Add the new value to the vector + attr_vector := attr_vector || attr_vector_tmp; + END IF; + + IF attr_def.sorter AND norm_attr_value[1] IS NOT NULL THEN + DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name; + INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]); + END IF; + + END LOOP; + +/* We may need to rewrite the vlist to contain + the intersection of new values for requested + attrs and old values for ignored attrs. To + do this, we take the old attr vlist and + subtract any values that are valid for the + requested attrs, and then add back the new + set of attr values. */ + + IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN + SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid; + SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list); + attr_vector := attr_vector || attr_vector_tmp; + END IF; + + -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite + -- attributes can depend on earlier ones. + PERFORM metabib.compile_composite_attr_cache_init(); + FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP + + FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP + + tmp_val := metabib.compile_composite_attr( ccvm_row.id ); + CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do + + IF attr_def.filter THEN + IF attr_vector @@ tmp_val::query_int THEN + attr_vector = attr_vector + intset(ccvm_row.id); + EXIT WHEN NOT attr_def.multi; + END IF; + END IF; + + IF attr_def.sorter THEN + IF attr_vector @@ tmp_val THEN + DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name; + INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code); + END IF; + END IF; + + END LOOP; + + END LOOP; + + IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN + IF rdeleted THEN -- initial insert OR revivication + DELETE FROM metabib.record_attr_vector_list WHERE source = rid; + INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector); + ELSE + UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid; + END IF; + END IF; + +END; + +$func$ LANGUAGE PLPGSQL; + +COMMIT; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.allow_spaces_as_ff_attr_values.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.allow_spaces_as_ff_attr_values.sql deleted file mode 100644 index 4ea3cc7136..0000000000 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.allow_spaces_as_ff_attr_values.sql +++ /dev/null @@ -1,250 +0,0 @@ --- Evergreen DB patch XXXX.schema.allow_spaces_as_ff_attr_values.sql --- --- LP#1414112 - don't over-normalize record attribute values to --- exclude all values that contain only blanks --- -BEGIN; - --- check whether patch can be applied -SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); - --- The code for "uncoded" audience is a space, but upgraded DBs may have a blank. -UPDATE config.coded_value_map - SET code = ' ' - WHERE ctype = 'audience' - AND code = '' - AND ( - SELECT COUNT(*) - FROM config.coded_value_map - WHERE ctype = 'audience' - AND code = ' ') = 0; - -WITH ccvm AS ( - SELECT id - FROM config.coded_value_map - WHERE ctype = 'audience' - AND code = ' ' -) UPDATE metabib.record_attr_vector_list - SET vlist = vlist + intset(ccvm.id) - FROM ccvm - WHERE source IN ( - SELECT record - FROM metabib.real_full_rec - WHERE tag = '008' - AND substring(value,23,1) = ' ' - ); - -CREATE OR REPLACE FUNCTION metabib.reingest_record_attributes (rid BIGINT, pattr_list TEXT[] DEFAULT NULL, prmarc TEXT DEFAULT NULL, rdeleted BOOL DEFAULT TRUE) RETURNS VOID AS $func$ -DECLARE - transformed_xml TEXT; - rmarc TEXT := prmarc; - tmp_val TEXT; - prev_xfrm TEXT; - normalizer RECORD; - xfrm config.xml_transform%ROWTYPE; - attr_vector INT[] := '{}'::INT[]; - attr_vector_tmp INT[]; - attr_list TEXT[] := pattr_list; - attr_value TEXT[]; - norm_attr_value TEXT[]; - tmp_xml TEXT; - attr_def config.record_attr_definition%ROWTYPE; - ccvm_row config.coded_value_map%ROWTYPE; -BEGIN - - IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete - SELECT ARRAY_AGG(name) INTO attr_list FROM config.record_attr_definition; - END IF; - - IF rmarc IS NULL THEN - SELECT marc INTO rmarc FROM biblio.record_entry WHERE id = rid; - END IF; - - FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE NOT composite AND name = ANY( attr_list ) ORDER BY format LOOP - - attr_value := '{}'::TEXT[]; - norm_attr_value := '{}'::TEXT[]; - attr_vector_tmp := '{}'::INT[]; - - SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1; - - -- tag+sf attrs only support SVF - IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection - SELECT ARRAY[ARRAY_TO_STRING(ARRAY_AGG(value), COALESCE(attr_def.joiner,' '))] INTO attr_value - FROM (SELECT * FROM metabib.full_rec ORDER BY tag, subfield) AS x - WHERE record = rid - AND tag LIKE attr_def.tag - AND CASE - WHEN attr_def.sf_list IS NOT NULL - THEN POSITION(subfield IN attr_def.sf_list) > 0 - ELSE TRUE - END - GROUP BY tag - ORDER BY tag - LIMIT 1; - - ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field - attr_value := vandelay.marc21_extract_fixed_field_list(rmarc, attr_def.fixed_field); - - IF NOT attr_def.multi THEN - attr_value := ARRAY[attr_value[1]]; - END IF; - - ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression - - SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format; - - -- See if we can skip the XSLT ... it's expensive - IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN - -- Can't skip the transform - IF xfrm.xslt <> '---' THEN - transformed_xml := oils_xslt_process(rmarc,xfrm.xslt); - ELSE - transformed_xml := rmarc; - END IF; - - prev_xfrm := xfrm.name; - END IF; - - IF xfrm.name IS NULL THEN - -- just grab the marcxml (empty) transform - SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1; - prev_xfrm := xfrm.name; - END IF; - - FOR tmp_xml IN SELECT oils_xpath(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP - tmp_val := oils_xpath_string( - '//*', - tmp_xml, - COALESCE(attr_def.joiner,' '), - ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] - ); - IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN - attr_value := attr_value || tmp_val; - EXIT WHEN NOT attr_def.multi; - END IF; - END LOOP; - - ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map - SELECT ARRAY_AGG(m.value) INTO attr_value - FROM vandelay.marc21_physical_characteristics(rmarc) v - LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value) - WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '') - AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) ); - - IF NOT attr_def.multi THEN - attr_value := ARRAY[attr_value[1]]; - END IF; - - END IF; - - -- apply index normalizers to attr_value - FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP - FOR normalizer IN - SELECT n.func AS func, - n.param_count AS param_count, - m.params AS params - FROM config.index_normalizer n - JOIN config.record_attr_index_norm_map m ON (m.norm = n.id) - WHERE attr = attr_def.name - ORDER BY m.pos LOOP - EXECUTE 'SELECT ' || normalizer.func || '(' || - COALESCE( quote_literal( tmp_val ), 'NULL' ) || - CASE - WHEN normalizer.param_count > 0 - THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') - ELSE '' - END || - ')' INTO tmp_val; - - END LOOP; - IF tmp_val IS NOT NULL AND tmp_val <> '' THEN - -- note that a string that contains only blanks - -- is a valid value for some attributes - norm_attr_value := norm_attr_value || tmp_val; - END IF; - END LOOP; - - IF attr_def.filter THEN - -- Create unknown uncontrolled values and find the IDs of the values - IF ccvm_row.id IS NULL THEN - FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP - IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN - BEGIN -- use subtransaction to isolate unique constraint violations - INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val ); - EXCEPTION WHEN unique_violation THEN END; - END IF; - END LOOP; - - SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value ); - ELSE - SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value ); - END IF; - - -- Add the new value to the vector - attr_vector := attr_vector || attr_vector_tmp; - END IF; - - IF attr_def.sorter AND norm_attr_value[1] IS NOT NULL THEN - DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name; - INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, norm_attr_value[1]); - END IF; - - END LOOP; - -/* We may need to rewrite the vlist to contain - the intersection of new values for requested - attrs and old values for ignored attrs. To - do this, we take the old attr vlist and - subtract any values that are valid for the - requested attrs, and then add back the new - set of attr values. */ - - IF ARRAY_LENGTH(pattr_list, 1) > 0 THEN - SELECT vlist INTO attr_vector_tmp FROM metabib.record_attr_vector_list WHERE source = rid; - SELECT attr_vector_tmp - ARRAY_AGG(id::INT) INTO attr_vector_tmp FROM metabib.full_attr_id_map WHERE attr = ANY (pattr_list); - attr_vector := attr_vector || attr_vector_tmp; - END IF; - - -- On to composite attributes, now that the record attrs have been pulled. Processed in name order, so later composite - -- attributes can depend on earlier ones. - PERFORM metabib.compile_composite_attr_cache_init(); - FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE composite AND name = ANY( attr_list ) ORDER BY name LOOP - - FOR ccvm_row IN SELECT * FROM config.coded_value_map c WHERE c.ctype = attr_def.name ORDER BY value LOOP - - tmp_val := metabib.compile_composite_attr( ccvm_row.id ); - CONTINUE WHEN tmp_val IS NULL OR tmp_val = ''; -- nothing to do - - IF attr_def.filter THEN - IF attr_vector @@ tmp_val::query_int THEN - attr_vector = attr_vector + intset(ccvm_row.id); - EXIT WHEN NOT attr_def.multi; - END IF; - END IF; - - IF attr_def.sorter THEN - IF attr_vector @@ tmp_val THEN - DELETE FROM metabib.record_sorter WHERE source = rid AND attr = attr_def.name; - INSERT INTO metabib.record_sorter (source, attr, value) VALUES (rid, attr_def.name, ccvm_row.code); - END IF; - END IF; - - END LOOP; - - END LOOP; - - IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN - IF rdeleted THEN -- initial insert OR revivication - DELETE FROM metabib.record_attr_vector_list WHERE source = rid; - INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector); - ELSE - UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid; - END IF; - END IF; - -END; - -$func$ LANGUAGE PLPGSQL; - -COMMIT;