From: Lebbeous Fogle-Weekley Date: Mon, 11 Jul 2011 18:23:30 +0000 (-0400) Subject: Stamping Mike's upgrade script for authority control sets X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=49081017012efb43fbc3114058cee53f629aab9c;p=evergreen%2Fmasslnc.git Stamping Mike's upgrade script for authority control sets Signed-off-by: Lebbeous Fogle-Weekley --- diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index 294eb0e055..96277dbb4e 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -86,7 +86,7 @@ CREATE TRIGGER no_overlapping_deps BEFORE INSERT OR UPDATE ON config.db_patch_dependencies FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates'); -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0574', :eg_version); -- berick +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0575', :eg_version); -- miker/senator CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/0575.schema.authority-control-sets.sql b/Open-ILS/src/sql/Pg/upgrade/0575.schema.authority-control-sets.sql new file mode 100644 index 0000000000..c8b74dc503 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0575.schema.authority-control-sets.sql @@ -0,0 +1,501 @@ +-- Evergreen DB patch XXXX.schema.authority-control-sets.sql +-- +-- Schema upgrade to add Authority Control Set functionality +-- +BEGIN; + + +-- check whether patch can be applied +SELECT evergreen.upgrade_deps_block_check('0575', :eg_version); + +CREATE TABLE authority.control_set ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL UNIQUE, -- i18n + description TEXT -- i18n +); + +CREATE TABLE authority.control_set_authority_field ( + id SERIAL PRIMARY KEY, + main_entry INT REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + tag CHAR(3) NOT NULL, + sf_list TEXT NOT NULL, + name TEXT NOT NULL, -- i18n + description TEXT -- i18n +); + +CREATE TABLE authority.control_set_bib_field ( + id SERIAL PRIMARY KEY, + authority_field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + tag CHAR(3) NOT NULL +); + +CREATE TABLE authority.thesaurus ( + code TEXT PRIMARY KEY, -- MARC21 thesaurus code + control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + name TEXT NOT NULL UNIQUE, -- i18n + description TEXT -- i18n +); + +CREATE TABLE authority.browse_axis ( + code TEXT PRIMARY KEY, + name TEXT UNIQUE NOT NULL, -- i18n + sorter TEXT REFERENCES config.record_attr_definition (name) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + description TEXT +); + +CREATE TABLE authority.browse_axis_authority_field_map ( + id SERIAL PRIMARY KEY, + axis TEXT NOT NULL REFERENCES authority.browse_axis (code) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED +); + +ALTER TABLE authority.record_entry ADD COLUMN control_set INT REFERENCES authority.control_set (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED; +ALTER TABLE authority.rec_descriptor DROP COLUMN char_encoding, ADD COLUMN encoding_level TEXT, ADD COLUMN thesaurus TEXT; + +CREATE INDEX authority_full_rec_value_index ON authority.full_rec (value); +CREATE OR REPLACE RULE protect_authority_rec_delete AS ON DELETE TO authority.record_entry DO INSTEAD (UPDATE authority.record_entry SET deleted = TRUE WHERE OLD.id = authority.record_entry.id; DELETE FROM authority.full_rec WHERE record = OLD.id); + +CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT, no_thesaurus BOOL ) RETURNS TEXT AS $func$ +DECLARE + acsaf authority.control_set_authority_field%ROWTYPE; + tag_used TEXT; + sf TEXT; + thes_code TEXT; + cset INT; + heading_text TEXT; + tmp_text TEXT; +BEGIN + thes_code := vandelay.marc21_extract_fixed_field(marcxml,'Subj'); + IF thes_code IS NULL THEN + thes_code := '|'; + END IF; + + SELECT control_set INTO cset FROM authority.thesaurus WHERE code = thes_code; + IF NOT FOUND THEN + cset = 1; + END IF; + + heading_text := ''; + FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset AND main_entry IS NULL LOOP + tag_used := acsaf.tag; + FOR sf IN SELECT * FROM regexp_split_to_table(acsaf.sf_list,'') LOOP + tmp_text := oils_xpath_string('//*[@tag="'||tag_used||'"]/*[@code="'||sf||'"]', marcxml); + IF tmp_text IS NOT NULL AND tmp_text <> '' THEN + heading_text := heading_text || E'\u2021' || sf || ' ' || tmp_text; + END IF; + END LOOP; + EXIT WHEN heading_text <> ''; + END LOOP; + + IF thes_code = 'z' THEN + thes_code := oils_xpath_string('//*[@tag="040"]/*[@code="f"][1]', marcxml); + END IF; + + IF heading_text <> '' THEN + IF no_thesaurus IS TRUE THEN + heading_text := tag_used || ' ' || public.naco_normalize(heading_text); + ELSE + heading_text := tag_used || '_' || thes_code || ' ' || public.naco_normalize(heading_text); + END IF; + ELSE + heading_text := 'NOHEADING_' || thes_code || ' ' || MD5(marcxml); + END IF; + + RETURN heading_text; +END; +$func$ LANGUAGE PLPGSQL IMMUTABLE; + +CREATE OR REPLACE FUNCTION authority.simple_normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$ + SELECT authority.normalize_heading($1, TRUE); +$func$ LANGUAGE SQL IMMUTABLE; + +CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$ + SELECT authority.normalize_heading($1, FALSE); +$func$ LANGUAGE SQL IMMUTABLE; + +CREATE OR REPLACE VIEW authority.tracing_links AS + SELECT main.record AS record, + main.id AS main_id, + main.tag AS main_tag, + oils_xpath_string('//*[@tag="'||main.tag||'"]/*[local-name()="subfield"]', are.marc) AS main_value, + substr(link.value,1,1) AS relationship, + substr(link.value,2,1) AS use_restriction, + substr(link.value,3,1) AS deprecation, + substr(link.value,4,1) AS display_restriction, + link.id AS link_id, + link.tag AS link_tag, + oils_xpath_string('//*[@tag="'||link.tag||'"]/*[local-name()="subfield"]', are.marc) AS link_value, + authority.normalize_heading(are.marc) AS normalized_main_value + FROM authority.full_rec main + JOIN authority.record_entry are ON (main.record = are.id) + JOIN authority.control_set_authority_field main_entry + ON (main_entry.tag = main.tag + AND main_entry.main_entry IS NULL + AND main.subfield = 'a' ) + JOIN authority.control_set_authority_field sub_entry + ON (main_entry.id = sub_entry.main_entry) + JOIN authority.full_rec link + ON (link.record = main.record + AND link.tag = sub_entry.tag + AND link.subfield = 'w' ); + +CREATE OR REPLACE FUNCTION authority.generate_overlay_template (source_xml TEXT) RETURNS TEXT AS $f$ +DECLARE + cset INT; + main_entry authority.control_set_authority_field%ROWTYPE; + bib_field authority.control_set_bib_field%ROWTYPE; + auth_id INT DEFAULT oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', source_xml)::INT; + replace_data XML[] DEFAULT '{}'::XML[]; + replace_rules TEXT[] DEFAULT '{}'::TEXT[]; + auth_field XML[]; +BEGIN + IF auth_id IS NULL THEN + RETURN NULL; + END IF; + + -- Default to the LoC controll set + SELECT COALESCE(control_set,1) INTO cset FROM authority.record_entry WHERE id = auth_id; + + FOR main_entry IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP + auth_field := XPATH('//*[@tag="'||main_entry.tag||'"][1]',source_xml::XML); + IF ARRAY_LENGTH(auth_field,1) > 0 THEN + FOR bib_field IN SELECT * FROM authority.control_set_bib_field WHERE authority_field = main_entry.id LOOP + replace_data := replace_data || XMLELEMENT( name datafield, XMLATTRIBUTES(bib_field.tag AS tag), XPATH('//*[local-name()="subfield"]',auth_field[1])::XML[]); + replace_rules := replace_rules || ( bib_field.tag || main_entry.sf_list || E'[0~\\)' || auth_id || '$]' ); + END LOOP; + EXIT; + END IF; + END LOOP; + + RETURN XMLELEMENT( + name record, + XMLATTRIBUTES('http://www.loc.gov/MARC21/slim' AS xmlns), + XMLELEMENT( name leader, '00881nam a2200193 4500'), + replace_data, + XMLELEMENT( + name datafield, + XMLATTRIBUTES( '905' AS tag, ' ' AS ind1, ' ' AS ind2), + XMLELEMENT( + name subfield, + XMLATTRIBUTES('r' AS code), + ARRAY_TO_STRING(replace_rules,',') + ) + ) + )::TEXT; +END; +$f$ STABLE LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION authority.generate_overlay_template ( BIGINT ) RETURNS TEXT AS $func$ + SELECT authority.generate_overlay_template( marc ) FROM authority.record_entry WHERE id = $1; +$func$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT, field TEXT, force_add INT ) RETURNS TEXT AS $_$ + + use MARC::Record; + use MARC::File::XML (BinaryEncoding => 'UTF-8'); + use MARC::Charset; + use strict; + + MARC::Charset->assume_unicode(1); + + my $target_xml = shift; + my $source_xml = shift; + my $field_spec = shift; + my $force_add = shift || 0; + + my $target_r = MARC::Record->new_from_xml( $target_xml ); + my $source_r = MARC::Record->new_from_xml( $source_xml ); + + return $target_xml unless ($target_r && $source_r); + + my @field_list = split(',', $field_spec); + + my %fields; + for my $f (@field_list) { + $f =~ s/^\s*//; $f =~ s/\s*$//; + if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) { + my $field = $1; + $field =~ s/\s+//; + my $sf = $2; + $sf =~ s/\s+//; + my $match = $3; + $match =~ s/^\s*//; $match =~ s/\s*$//; + $fields{$field} = { sf => [ split('', $sf) ] }; + if ($match) { + my ($msf,$mre) = split('~', $match); + if (length($msf) > 0 and length($mre) > 0) { + $msf =~ s/^\s*//; $msf =~ s/\s*$//; + $mre =~ s/^\s*//; $mre =~ s/\s*$//; + $fields{$field}{match} = { sf => $msf, re => qr/$mre/ }; + } + } + } + } + + for my $f ( keys %fields) { + if ( @{$fields{$f}{sf}} ) { + for my $from_field ($source_r->field( $f )) { + my @tos = $target_r->field( $f ); + if (!@tos) { + next if (exists($fields{$f}{match}) and !$force_add); + my @new_fields = map { $_->clone } $source_r->field( $f ); + $target_r->insert_fields_ordered( @new_fields ); + } else { + for my $to_field (@tos) { + if (exists($fields{$f}{match})) { + next unless (grep { $_ =~ $fields{$f}{match}{re} } $to_field->subfield($fields{$f}{match}{sf})); + } + my @new_sf = map { ($_ => $from_field->subfield($_)) } grep { defined($from_field->subfield($_)) } @{$fields{$f}{sf}}; + $to_field->add_subfields( @new_sf ); + } + } + } + } else { + my @new_fields = map { $_->clone } $source_r->field( $f ); + $target_r->insert_fields_ordered( @new_fields ); + } + } + + $target_xml = $target_r->as_xml_record; + $target_xml =~ s/^<\?.+?\?>$//mo; + $target_xml =~ s/\n//sgo; + $target_xml =~ s/>\s+ '' THEN - heading_text := heading_text || E'\u2021' || sf || ' ' || tmp_text; - END IF; - END LOOP; - EXIT WHEN heading_text <> ''; - END LOOP; - - IF thes_code = 'z' THEN - thes_code := oils_xpath_string('//*[@tag="040"]/*[@code="f"][1]', marcxml); - END IF; - - IF heading_text <> '' THEN - IF no_thesaurus IS TRUE THEN - heading_text := tag_used || ' ' || public.naco_normalize(heading_text); - ELSE - heading_text := tag_used || '_' || thes_code || ' ' || public.naco_normalize(heading_text); - END IF; - ELSE - heading_text := 'NOHEADING_' || thes_code || ' ' || MD5(marcxml); - END IF; - - RETURN heading_text; -END; -$func$ LANGUAGE PLPGSQL IMMUTABLE; - -CREATE OR REPLACE FUNCTION authority.simple_normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$ - SELECT authority.normalize_heading($1, TRUE); -$func$ LANGUAGE SQL IMMUTABLE; - -CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$ - SELECT authority.normalize_heading($1, FALSE); -$func$ LANGUAGE SQL IMMUTABLE; - -CREATE OR REPLACE VIEW authority.tracing_links AS - SELECT main.record AS record, - main.id AS main_id, - main.tag AS main_tag, - oils_xpath_string('//*[@tag="'||main.tag||'"]/*[local-name()="subfield"]', are.marc) AS main_value, - substr(link.value,1,1) AS relationship, - substr(link.value,2,1) AS use_restriction, - substr(link.value,3,1) AS deprecation, - substr(link.value,4,1) AS display_restriction, - link.id AS link_id, - link.tag AS link_tag, - oils_xpath_string('//*[@tag="'||link.tag||'"]/*[local-name()="subfield"]', are.marc) AS link_value, - authority.normalize_heading(are.marc) AS normalized_main_value - FROM authority.full_rec main - JOIN authority.record_entry are ON (main.record = are.id) - JOIN authority.control_set_authority_field main_entry - ON (main_entry.tag = main.tag - AND main_entry.main_entry IS NULL - AND main.subfield = 'a' ) - JOIN authority.control_set_authority_field sub_entry - ON (main_entry.id = sub_entry.main_entry) - JOIN authority.full_rec link - ON (link.record = main.record - AND link.tag = sub_entry.tag - AND link.subfield = 'w' ); - -CREATE OR REPLACE FUNCTION authority.generate_overlay_template (source_xml TEXT) RETURNS TEXT AS $f$ -DECLARE - cset INT; - main_entry authority.control_set_authority_field%ROWTYPE; - bib_field authority.control_set_bib_field%ROWTYPE; - auth_id INT DEFAULT oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', source_xml)::INT; - replace_data XML[] DEFAULT '{}'::XML[]; - replace_rules TEXT[] DEFAULT '{}'::TEXT[]; - auth_field XML[]; -BEGIN - IF auth_id IS NULL THEN - RETURN NULL; - END IF; - - -- Default to the LoC controll set - SELECT COALESCE(control_set,1) INTO cset FROM authority.record_entry WHERE id = auth_id; - - FOR main_entry IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP - auth_field := XPATH('//*[@tag="'||main_entry.tag||'"][1]',source_xml::XML); - IF ARRAY_LENGTH(auth_field,1) > 0 THEN - FOR bib_field IN SELECT * FROM authority.control_set_bib_field WHERE authority_field = main_entry.id LOOP - replace_data := replace_data || XMLELEMENT( name datafield, XMLATTRIBUTES(bib_field.tag AS tag), XPATH('//*[local-name()="subfield"]',auth_field[1])::XML[]); - replace_rules := replace_rules || ( bib_field.tag || main_entry.sf_list || E'[0~\\)' || auth_id || '$]' ); - END LOOP; - EXIT; - END IF; - END LOOP; - - RETURN XMLELEMENT( - name record, - XMLATTRIBUTES('http://www.loc.gov/MARC21/slim' AS xmlns), - XMLELEMENT( name leader, '00881nam a2200193 4500'), - replace_data, - XMLELEMENT( - name datafield, - XMLATTRIBUTES( '905' AS tag, ' ' AS ind1, ' ' AS ind2), - XMLELEMENT( - name subfield, - XMLATTRIBUTES('r' AS code), - ARRAY_TO_STRING(replace_rules,',') - ) - ) - )::TEXT; -END; -$f$ STABLE LANGUAGE PLPGSQL; - -CREATE OR REPLACE FUNCTION authority.generate_overlay_template ( BIGINT ) RETURNS TEXT AS $func$ - SELECT authority.generate_overlay_template( marc ) FROM authority.record_entry WHERE id = $1; -$func$ LANGUAGE SQL; - -CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT, field TEXT, force_add INT ) RETURNS TEXT AS $_$ - - use MARC::Record; - use MARC::File::XML (BinaryEncoding => 'UTF-8'); - use MARC::Charset; - use strict; - - MARC::Charset->assume_unicode(1); - - my $target_xml = shift; - my $source_xml = shift; - my $field_spec = shift; - my $force_add = shift || 0; - - my $target_r = MARC::Record->new_from_xml( $target_xml ); - my $source_r = MARC::Record->new_from_xml( $source_xml ); - - return $target_xml unless ($target_r && $source_r); - - my @field_list = split(',', $field_spec); - - my %fields; - for my $f (@field_list) { - $f =~ s/^\s*//; $f =~ s/\s*$//; - if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) { - my $field = $1; - $field =~ s/\s+//; - my $sf = $2; - $sf =~ s/\s+//; - my $match = $3; - $match =~ s/^\s*//; $match =~ s/\s*$//; - $fields{$field} = { sf => [ split('', $sf) ] }; - if ($match) { - my ($msf,$mre) = split('~', $match); - if (length($msf) > 0 and length($mre) > 0) { - $msf =~ s/^\s*//; $msf =~ s/\s*$//; - $mre =~ s/^\s*//; $mre =~ s/\s*$//; - $fields{$field}{match} = { sf => $msf, re => qr/$mre/ }; - } - } - } - } - - for my $f ( keys %fields) { - if ( @{$fields{$f}{sf}} ) { - for my $from_field ($source_r->field( $f )) { - my @tos = $target_r->field( $f ); - if (!@tos) { - next if (exists($fields{$f}{match}) and !$force_add); - my @new_fields = map { $_->clone } $source_r->field( $f ); - $target_r->insert_fields_ordered( @new_fields ); - } else { - for my $to_field (@tos) { - if (exists($fields{$f}{match})) { - next unless (grep { $_ =~ $fields{$f}{match}{re} } $to_field->subfield($fields{$f}{match}{sf})); - } - my @new_sf = map { ($_ => $from_field->subfield($_)) } grep { defined($from_field->subfield($_)) } @{$fields{$f}{sf}}; - $to_field->add_subfields( @new_sf ); - } - } - } - } else { - my @new_fields = map { $_->clone } $source_r->field( $f ); - $target_r->insert_fields_ordered( @new_fields ); - } - } - - $target_xml = $target_r->as_xml_record; - $target_xml =~ s/^<\?.+?\?>$//mo; - $target_xml =~ s/\n//sgo; - $target_xml =~ s/>\s+