From 6343d40803699e3d287ba8adec979dd99c4687ef Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Mon, 11 Jul 2011 11:41:35 -0400 Subject: [PATCH] Unstamped upgrade script for Authority Control Sets ... with additional fixes from Lebbeous Fogle-Weekley Signed-off-by: Mike Rylander Signed-off-by: Lebbeous Fogle-Weekley --- Open-ILS/src/sql/Pg/011.schema.authority.sql | 4 +- .../upgrade/XXXX.schema.authority-control-sets.sql | 501 +++++++++++++++++++++ 2 files changed, 503 insertions(+), 2 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority-control-sets.sql diff --git a/Open-ILS/src/sql/Pg/011.schema.authority.sql b/Open-ILS/src/sql/Pg/011.schema.authority.sql index 7c44d7eab7..630fece552 100644 --- a/Open-ILS/src/sql/Pg/011.schema.authority.sql +++ b/Open-ILS/src/sql/Pg/011.schema.authority.sql @@ -218,14 +218,14 @@ CREATE OR REPLACE VIEW authority.tracing_links AS main.id AS main_id, main.tag AS main_tag, oils_xpath_string('//*[@tag="'||main.tag||'"]/*[local-name()="subfield"]', are.marc) AS main_value, - authority.normalize_heading(are.marc) AS normalized_main_value, substr(link.value,1,1) AS relationship, substr(link.value,2,1) AS use_restriction, substr(link.value,3,1) AS deprecation, substr(link.value,4,1) AS display_restriction, link.id AS link_id, link.tag AS link_tag, - oils_xpath_string('//*[@tag="'||link.tag||'"]/*[local-name()="subfield"]', are.marc) AS link_value + oils_xpath_string('//*[@tag="'||link.tag||'"]/*[local-name()="subfield"]', are.marc) AS link_value, + authority.normalize_heading(are.marc) AS normalized_main_value, FROM authority.full_rec main JOIN authority.record_entry are ON (main.record = are.id) JOIN authority.control_set_authority_field main_entry diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority-control-sets.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority-control-sets.sql new file mode 100644 index 0000000000..97033018d7 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.authority-control-sets.sql @@ -0,0 +1,501 @@ +-- Evergreen DB patch XXXX.schema.authority-control-sets.sql +-- +-- Schema upgrade to add Authority Control Set functionality +-- +BEGIN; + + +-- check whether patch can be applied +SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +CREATE TABLE authority.control_set ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL UNIQUE, -- i18n + description TEXT -- i18n +); + +CREATE TABLE authority.control_set_authority_field ( + id SERIAL PRIMARY KEY, + main_entry INT REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + tag CHAR(3) NOT NULL, + sf_list TEXT NOT NULL, + name TEXT NOT NULL, -- i18n + description TEXT -- i18n +); + +CREATE TABLE authority.control_set_bib_field ( + id SERIAL PRIMARY KEY, + authority_field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + tag CHAR(3) NOT NULL +); + +CREATE TABLE authority.thesaurus ( + code TEXT PRIMARY KEY, -- MARC21 thesaurus code + control_set INT NOT NULL REFERENCES authority.control_set (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + name TEXT NOT NULL UNIQUE, -- i18n + description TEXT -- i18n +); + +CREATE TABLE authority.browse_axis ( + code TEXT PRIMARY KEY, + name TEXT UNIQUE NOT NULL, -- i18n + sorter TEXT REFERENCES config.record_attr_definition (name) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + description TEXT +); + +CREATE TABLE authority.browse_axis_authority_field_map ( + id SERIAL PRIMARY KEY, + axis TEXT NOT NULL REFERENCES authority.browse_axis (code) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED, + field INT NOT NULL REFERENCES authority.control_set_authority_field (id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED +); + +ALTER TABLE authority.record_entry ADD COLUMN control_set INT REFERENCES authority.control_set (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED; +ALTER TABLE authority.rec_descriptor DROP COLUMN char_encoding, ADD COLUMN encoding_level TEXT, ADD COLUMN thesaurus TEXT; + +CREATE INDEX authority_full_rec_value_index ON authority.full_rec (value); +CREATE OR REPLACE RULE protect_authority_rec_delete AS ON DELETE TO authority.record_entry DO INSTEAD (UPDATE authority.record_entry SET deleted = TRUE WHERE OLD.id = authority.record_entry.id; DELETE FROM authority.full_rec WHERE record = OLD.id); + +CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT, no_thesaurus BOOL ) RETURNS TEXT AS $func$ +DECLARE + acsaf authority.control_set_authority_field%ROWTYPE; + tag_used TEXT; + sf TEXT; + thes_code TEXT; + cset INT; + heading_text TEXT; + tmp_text TEXT; +BEGIN + thes_code := vandelay.marc21_extract_fixed_field(marcxml,'Subj'); + IF thes_code IS NULL THEN + thes_code := '|'; + END IF; + + SELECT control_set INTO cset FROM authority.thesaurus WHERE code = thes_code; + IF NOT FOUND THEN + cset = 1; + END IF; + + heading_text := ''; + FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset AND main_entry IS NULL LOOP + tag_used := acsaf.tag; + FOR sf IN SELECT * FROM regexp_split_to_table(acsaf.sf_list,'') LOOP + tmp_text := oils_xpath_string('//*[@tag="'||tag_used||'"]/*[@code="'||sf||'"]', marcxml); + IF tmp_text IS NOT NULL AND tmp_text <> '' THEN + heading_text := heading_text || E'\u2021' || sf || ' ' || tmp_text; + END IF; + END LOOP; + EXIT WHEN heading_text <> ''; + END LOOP; + + IF thes_code = 'z' THEN + thes_code := oils_xpath_string('//*[@tag="040"]/*[@code="f"][1]', marcxml); + END IF; + + IF heading_text <> '' THEN + IF no_thesaurus IS TRUE THEN + heading_text := tag_used || ' ' || public.naco_normalize(heading_text); + ELSE + heading_text := tag_used || '_' || thes_code || ' ' || public.naco_normalize(heading_text); + END IF; + ELSE + heading_text := 'NOHEADING_' || thes_code || ' ' || MD5(marcxml); + END IF; + + RETURN heading_text; +END; +$func$ LANGUAGE PLPGSQL IMMUTABLE; + +CREATE OR REPLACE FUNCTION authority.simple_normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$ + SELECT authority.normalize_heading($1, TRUE); +$func$ LANGUAGE SQL IMMUTABLE; + +CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT ) RETURNS TEXT AS $func$ + SELECT authority.normalize_heading($1, FALSE); +$func$ LANGUAGE SQL IMMUTABLE; + +CREATE OR REPLACE VIEW authority.tracing_links AS + SELECT main.record AS record, + main.id AS main_id, + main.tag AS main_tag, + oils_xpath_string('//*[@tag="'||main.tag||'"]/*[local-name()="subfield"]', are.marc) AS main_value, + substr(link.value,1,1) AS relationship, + substr(link.value,2,1) AS use_restriction, + substr(link.value,3,1) AS deprecation, + substr(link.value,4,1) AS display_restriction, + link.id AS link_id, + link.tag AS link_tag, + oils_xpath_string('//*[@tag="'||link.tag||'"]/*[local-name()="subfield"]', are.marc) AS link_value, + authority.normalize_heading(are.marc) AS normalized_main_value + FROM authority.full_rec main + JOIN authority.record_entry are ON (main.record = are.id) + JOIN authority.control_set_authority_field main_entry + ON (main_entry.tag = main.tag + AND main_entry.main_entry IS NULL + AND main.subfield = 'a' ) + JOIN authority.control_set_authority_field sub_entry + ON (main_entry.id = sub_entry.main_entry) + JOIN authority.full_rec link + ON (link.record = main.record + AND link.tag = sub_entry.tag + AND link.subfield = 'w' ); + +CREATE OR REPLACE FUNCTION authority.generate_overlay_template (source_xml TEXT) RETURNS TEXT AS $f$ +DECLARE + cset INT; + main_entry authority.control_set_authority_field%ROWTYPE; + bib_field authority.control_set_bib_field%ROWTYPE; + auth_id INT DEFAULT oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', source_xml)::INT; + replace_data XML[] DEFAULT '{}'::XML[]; + replace_rules TEXT[] DEFAULT '{}'::TEXT[]; + auth_field XML[]; +BEGIN + IF auth_id IS NULL THEN + RETURN NULL; + END IF; + + -- Default to the LoC controll set + SELECT COALESCE(control_set,1) INTO cset FROM authority.record_entry WHERE id = auth_id; + + FOR main_entry IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP + auth_field := XPATH('//*[@tag="'||main_entry.tag||'"][1]',source_xml::XML); + IF ARRAY_LENGTH(auth_field,1) > 0 THEN + FOR bib_field IN SELECT * FROM authority.control_set_bib_field WHERE authority_field = main_entry.id LOOP + replace_data := replace_data || XMLELEMENT( name datafield, XMLATTRIBUTES(bib_field.tag AS tag), XPATH('//*[local-name()="subfield"]',auth_field[1])::XML[]); + replace_rules := replace_rules || ( bib_field.tag || main_entry.sf_list || E'[0~\\)' || auth_id || '$]' ); + END LOOP; + EXIT; + END IF; + END LOOP; + + RETURN XMLELEMENT( + name record, + XMLATTRIBUTES('http://www.loc.gov/MARC21/slim' AS xmlns), + XMLELEMENT( name leader, '00881nam a2200193 4500'), + replace_data, + XMLELEMENT( + name datafield, + XMLATTRIBUTES( '905' AS tag, ' ' AS ind1, ' ' AS ind2), + XMLELEMENT( + name subfield, + XMLATTRIBUTES('r' AS code), + ARRAY_TO_STRING(replace_rules,',') + ) + ) + )::TEXT; +END; +$f$ STABLE LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION authority.generate_overlay_template ( BIGINT ) RETURNS TEXT AS $func$ + SELECT authority.generate_overlay_template( marc ) FROM authority.record_entry WHERE id = $1; +$func$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION vandelay.add_field ( target_xml TEXT, source_xml TEXT, field TEXT, force_add INT ) RETURNS TEXT AS $_$ + + use MARC::Record; + use MARC::File::XML (BinaryEncoding => 'UTF-8'); + use MARC::Charset; + use strict; + + MARC::Charset->assume_unicode(1); + + my $target_xml = shift; + my $source_xml = shift; + my $field_spec = shift; + my $force_add = shift || 0; + + my $target_r = MARC::Record->new_from_xml( $target_xml ); + my $source_r = MARC::Record->new_from_xml( $source_xml ); + + return $target_xml unless ($target_r && $source_r); + + my @field_list = split(',', $field_spec); + + my %fields; + for my $f (@field_list) { + $f =~ s/^\s*//; $f =~ s/\s*$//; + if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) { + my $field = $1; + $field =~ s/\s+//; + my $sf = $2; + $sf =~ s/\s+//; + my $match = $3; + $match =~ s/^\s*//; $match =~ s/\s*$//; + $fields{$field} = { sf => [ split('', $sf) ] }; + if ($match) { + my ($msf,$mre) = split('~', $match); + if (length($msf) > 0 and length($mre) > 0) { + $msf =~ s/^\s*//; $msf =~ s/\s*$//; + $mre =~ s/^\s*//; $mre =~ s/\s*$//; + $fields{$field}{match} = { sf => $msf, re => qr/$mre/ }; + } + } + } + } + + for my $f ( keys %fields) { + if ( @{$fields{$f}{sf}} ) { + for my $from_field ($source_r->field( $f )) { + my @tos = $target_r->field( $f ); + if (!@tos) { + next if (exists($fields{$f}{match}) and !$force_add); + my @new_fields = map { $_->clone } $source_r->field( $f ); + $target_r->insert_fields_ordered( @new_fields ); + } else { + for my $to_field (@tos) { + if (exists($fields{$f}{match})) { + next unless (grep { $_ =~ $fields{$f}{match}{re} } $to_field->subfield($fields{$f}{match}{sf})); + } + my @new_sf = map { ($_ => $from_field->subfield($_)) } grep { defined($from_field->subfield($_)) } @{$fields{$f}{sf}}; + $to_field->add_subfields( @new_sf ); + } + } + } + } else { + my @new_fields = map { $_->clone } $source_r->field( $f ); + $target_r->insert_fields_ordered( @new_fields ); + } + } + + $target_xml = $target_r->as_xml_record; + $target_xml =~ s/^<\?.+?\?>$//mo; + $target_xml =~ s/\n//sgo; + $target_xml =~ s/>\s+