From: dbs Date: Sun, 1 May 2011 12:20:55 +0000 (+0000) Subject: Remove hard-coded default bootstrap location from authority_control_fields.pl X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=a2552ae059c3de551da8a81a9dea47b680918938;p=evergreen%2Fpines.git Remove hard-coded default bootstrap location from authority_control_fields.pl Let autoconf do its magic and we get away from one more hard-coded default. Signed-off-by: Dan Scott git-svn-id: svn://svn.open-ils.org/ILS/trunk@20359 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- diff --git a/Open-ILS/src/support-scripts/authority_control_fields.pl b/Open-ILS/src/support-scripts/authority_control_fields.pl deleted file mode 100755 index f8864d0e2c..0000000000 --- a/Open-ILS/src/support-scripts/authority_control_fields.pl +++ /dev/null @@ -1,563 +0,0 @@ -#!/usr/bin/perl -# Copyright (C) 2010-2011 Laurentian University -# Author: Dan Scott -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# --------------------------------------------------------------- - -use strict; -use warnings; -use DBI; -use Getopt::Long; -use MARC::Record; -use MARC::File::XML (BinaryEncoding => 'UTF-8'); -use OpenSRF::System; -use OpenILS::Utils::Fieldmapper; -use OpenSRF::Utils::SettingsClient; -use OpenSRF::EX qw/:try/; -use Encode; -use Unicode::Normalize; -use OpenILS::Application::AppUtils; -use Data::Dumper; -use Pod::Usage qw/ pod2usage /; - -my ($start_id, $end_id); -my $bootstrap = '/openils/conf/opensrf_core.xml'; -my @records; - -my %options; -my $result = GetOptions( - \%options, - 'configuration=s' => \$bootstrap, - 'record=i' => \@records, - 'all', 'help', - 'start_id=i' => \$start_id, - 'end_id=i' => \$end_id, -); - -if (!$result or $options{help}) { - pod2usage(0); -} - -OpenSRF::System->bootstrap_client(config_file => $bootstrap); -Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); - -# must be loaded and initialized after the IDL is parsed -use OpenILS::Utils::CStoreEditor; -OpenILS::Utils::CStoreEditor::init(); - -my $e = OpenILS::Utils::CStoreEditor->new; -my $undeleted; -if ($options{all}) { - # get a list of all non-deleted records from Evergreen - # open-ils.cstore open-ils.cstore.direct.biblio.record_entry.id_list.atomic {"deleted":"f"} - $undeleted = $e->request( - 'open-ils.cstore.direct.biblio.record_entry.id_list.atomic', - [{deleted => 'f'}, {id => { '>' => 0}}] - ); - @records = @$undeleted; -} - -if ($start_id and $end_id) { - @records = ($start_id .. $end_id); -} -# print Dumper($undeleted, \@records); - -# Hash of controlled fields & subfields in bibliographic records, and their -# corresponding controlling fields & subfields in the authority record -# -# So, if the bib 650$a can be controlled by an auth 150$a, that maps to: -# 650 => { a => { 150 => 'a'}} -my %controllees = ( - 100 => { a => { 100 => 'a' }, - b => { 100 => 'b' }, - c => { 100 => 'c' }, - d => { 100 => 'd' }, - e => { 100 => 'e' }, - f => { 100 => 'f' }, - g => { 100 => 'g' }, - j => { 100 => 'j' }, - k => { 100 => 'k' }, - l => { 100 => 'l' }, - n => { 100 => 'n' }, - p => { 100 => 'p' }, - q => { 100 => 'q' }, - t => { 100 => 't' }, - u => { 100 => 'u' }, - 4 => { 100 => '4' }, - }, - 110 => { a => { 110 => 'a' }, - b => { 110 => 'b' }, - c => { 110 => 'c' }, - d => { 110 => 'd' }, - e => { 110 => 'e' }, - f => { 110 => 'f' }, - g => { 110 => 'g' }, - k => { 110 => 'k' }, - l => { 110 => 'l' }, - n => { 110 => 'n' }, - p => { 110 => 'p' }, - t => { 110 => 't' }, - u => { 110 => 'u' }, - 4 => { 110 => '4' }, - }, - 111 => { a => { 111 => 'a' }, - c => { 111 => 'c' }, - d => { 111 => 'd' }, - e => { 111 => 'e' }, - f => { 111 => 'f' }, - g => { 111 => 'g' }, - j => { 111 => 'j' }, - k => { 111 => 'k' }, - l => { 111 => 'l' }, - n => { 111 => 'n' }, - p => { 111 => 'p' }, - q => { 111 => 'q' }, - t => { 111 => 't' }, - u => { 111 => 'u' }, - 4 => { 111 => '4' }, - }, - 130 => { a => { 130 => 'a' }, - d => { 130 => 'd' }, - f => { 130 => 'f' }, - g => { 130 => 'g' }, - h => { 130 => 'h' }, - k => { 130 => 'k' }, - l => { 130 => 'l' }, - m => { 130 => 'm' }, - n => { 130 => 'n' }, - o => { 130 => 'o' }, - p => { 130 => 'p' }, - r => { 130 => 'r' }, - s => { 130 => 's' }, - t => { 130 => 't' }, - }, - 600 => { a => { 100 => 'a' }, - b => { 100 => 'b' }, - c => { 100 => 'c' }, - d => { 100 => 'd' }, - e => { 100 => 'e' }, - f => { 100 => 'f' }, - g => { 100 => 'g' }, - h => { 100 => 'h' }, - j => { 100 => 'j' }, - k => { 100 => 'k' }, - l => { 100 => 'l' }, - m => { 100 => 'm' }, - n => { 100 => 'n' }, - o => { 100 => 'o' }, - p => { 100 => 'p' }, - q => { 100 => 'q' }, - r => { 100 => 'r' }, - s => { 100 => 's' }, - t => { 100 => 't' }, - v => { 100 => 'v' }, - x => { 100 => 'x' }, - y => { 100 => 'y' }, - z => { 100 => 'z' }, - 4 => { 100 => '4' }, - }, - 610 => { a => { 110 => 'a' }, - b => { 110 => 'b' }, - c => { 110 => 'c' }, - d => { 110 => 'd' }, - e => { 110 => 'e' }, - f => { 110 => 'f' }, - g => { 110 => 'g' }, - h => { 110 => 'h' }, - k => { 110 => 'k' }, - l => { 110 => 'l' }, - m => { 110 => 'm' }, - n => { 110 => 'n' }, - o => { 110 => 'o' }, - p => { 110 => 'p' }, - r => { 110 => 'r' }, - s => { 110 => 's' }, - t => { 110 => 't' }, - v => { 110 => 'v' }, - x => { 110 => 'x' }, - y => { 110 => 'y' }, - z => { 110 => 'z' }, - }, - 611 => { a => { 111 => 'a' }, - c => { 111 => 'c' }, - d => { 111 => 'd' }, - e => { 111 => 'e' }, - f => { 111 => 'f' }, - g => { 111 => 'g' }, - h => { 111 => 'h' }, - j => { 111 => 'j' }, - k => { 111 => 'k' }, - l => { 111 => 'l' }, - n => { 111 => 'n' }, - p => { 111 => 'p' }, - q => { 111 => 'q' }, - s => { 111 => 's' }, - t => { 111 => 't' }, - v => { 111 => 'v' }, - x => { 111 => 'x' }, - y => { 111 => 'y' }, - z => { 111 => 'z' }, - }, - 630 => { a => { 130 => 'a' }, - d => { 130 => 'd' }, - f => { 130 => 'f' }, - g => { 130 => 'g' }, - h => { 130 => 'h' }, - k => { 130 => 'k' }, - l => { 130 => 'l' }, - m => { 130 => 'm' }, - n => { 130 => 'n' }, - o => { 130 => 'o' }, - p => { 130 => 'p' }, - r => { 130 => 'r' }, - s => { 130 => 's' }, - t => { 130 => 't' }, - v => { 130 => 'v' }, - x => { 130 => 'x' }, - y => { 130 => 'y' }, - z => { 130 => 'z' }, - }, - 648 => { a => { 148 => 'a' }, - v => { 148 => 'v' }, - x => { 148 => 'x' }, - y => { 148 => 'y' }, - z => { 148 => 'z' }, - }, - 650 => { a => { 150 => 'a' }, - b => { 150 => 'b' }, - v => { 150 => 'v' }, - x => { 150 => 'x' }, - y => { 150 => 'y' }, - z => { 150 => 'z' }, - }, - 651 => { a => { 151 => 'a' }, - v => { 151 => 'v' }, - x => { 151 => 'x' }, - y => { 151 => 'y' }, - z => { 151 => 'z' }, - }, - 655 => { a => { 155 => 'a' }, - v => { 155 => 'v' }, - x => { 155 => 'x' }, - y => { 155 => 'y' }, - z => { 155 => 'z' }, - }, - 700 => { a => { 100 => 'a' }, - b => { 100 => 'b' }, - c => { 100 => 'c' }, - d => { 100 => 'd' }, - e => { 100 => 'e' }, - f => { 100 => 'f' }, - g => { 100 => 'g' }, - j => { 100 => 'j' }, - k => { 100 => 'k' }, - l => { 100 => 'l' }, - n => { 100 => 'n' }, - p => { 100 => 'p' }, - q => { 100 => 'q' }, - t => { 100 => 't' }, - u => { 100 => 'u' }, - 4 => { 100 => '4' }, - }, - 710 => { a => { 110 => 'a' }, - b => { 110 => 'b' }, - c => { 110 => 'c' }, - d => { 110 => 'd' }, - e => { 110 => 'e' }, - f => { 110 => 'f' }, - g => { 110 => 'g' }, - k => { 110 => 'k' }, - l => { 110 => 'l' }, - n => { 110 => 'n' }, - p => { 110 => 'p' }, - t => { 110 => 't' }, - u => { 110 => 'u' }, - 4 => { 110 => '4' }, - }, - 711 => { a => { 111 => 'a' }, - c => { 111 => 'c' }, - d => { 111 => 'd' }, - e => { 111 => 'e' }, - f => { 111 => 'f' }, - g => { 111 => 'g' }, - j => { 111 => 'j' }, - k => { 111 => 'k' }, - l => { 111 => 'l' }, - n => { 111 => 'n' }, - p => { 111 => 'p' }, - q => { 111 => 'q' }, - t => { 111 => 't' }, - u => { 111 => 'u' }, - 4 => { 111 => '4' }, - }, - 730 => { a => { 130 => 'a' }, - d => { 130 => 'd' }, - f => { 130 => 'f' }, - g => { 130 => 'g' }, - h => { 130 => 'h' }, - k => { 130 => 'k' }, - l => { 130 => 'l' }, - m => { 130 => 'm' }, - n => { 130 => 'n' }, - o => { 130 => 'o' }, - p => { 130 => 'p' }, - r => { 130 => 'r' }, - s => { 130 => 's' }, - t => { 130 => 't' }, - }, - 751 => { a => { 151 => 'a' }, - v => { 151 => 'v' }, - x => { 151 => 'x' }, - y => { 151 => 'y' }, - z => { 151 => 'z' }, - }, - 830 => { a => { 830 => 'a' }, - d => { 130 => 'd' }, - f => { 130 => 'f' }, - g => { 130 => 'g' }, - h => { 130 => 'h' }, - k => { 130 => 'k' }, - l => { 130 => 'l' }, - m => { 130 => 'm' }, - n => { 130 => 'n' }, - o => { 130 => 'o' }, - p => { 130 => 'p' }, - r => { 130 => 'r' }, - s => { 130 => 's' }, - t => { 130 => 't' }, - }, -); - -foreach my $rec_id (@records) { - # print "$rec_id\n"; - - # State variable; was the record changed? - my $changed; - - # get the record - my $record = $e->retrieve_biblio_record_entry($rec_id); - next unless $record; - # print Dumper($record); - - try { - my $marc = MARC::Record->new_from_xml($record->marc()); - - # get the list of controlled fields - my @c_fields = keys %controllees; - - foreach my $c_tag (@c_fields) { - my @c_subfields = keys %{$controllees{"$c_tag"}}; - # print "Field: $field subfields: "; - # foreach (@subfields) { print "$_ "; } - - # Get the MARCXML from the record and check for controlled fields/subfields - my @bib_fields = ($marc->field($c_tag)); - foreach my $bib_field (@bib_fields) { - # print $_->as_formatted(); - my %match_subfields; - my $match_tag; - my @searches; - foreach my $c_subfield (@c_subfields) { - my $sf = $bib_field->subfield($c_subfield); - if ($sf) { - # Give me the first element of the list of authority controlling tags for this subfield - # XXX Will we need to support more than one controlling tag per subfield? Probably. That - # will suck. Oh well, leave that up to Ole to implement. - $match_subfields{$c_subfield} = (keys %{$controllees{$c_tag}{$c_subfield}})[0]; - $match_tag = $match_subfields{$c_subfield}; - push @searches, {term => $sf, subfield => $c_subfield}; - } - } - # print Dumper(\%match_subfields); - next if !$match_tag; - - my @tags = ($match_tag); - - # print "Controlling tag: $c_tag and match tag $match_tag\n"; - # print Dumper(\@tags, \@searches); - - # Now we've built up a complete set of matching controlled - # subfields for this particular field; let's check to see if - # we have a matching authority record - my $session = OpenSRF::AppSession->create("open-ils.search"); - my $validates = $session->request("open-ils.search.authority.validate.tag.id_list", - "tags", \@tags, "searches", \@searches - )->gather(); - $session->disconnect(); - - # print Dumper($validates); - - # Protect against failed (error condition) search request - if (!$validates) { - print STDERR "Search for matching authority failed; record # $rec_id\n"; - next; - } - - if (scalar(@$validates) == 0) { - next; - } - - # Iterate through the returned authority record IDs to delete any - # matching $0 subfields already in the bib record - foreach my $auth_zero (@$validates) { - $bib_field->delete_subfield(code => '0', match => qr/\)$auth_zero$/); - } - - # Okay, we have a matching authority control; time to - # add the magical subfield 0. Use the first returned auth - # record as a match. - my $auth_id = @$validates[0]; - my $auth_rec = $e->retrieve_authority_record_entry($auth_id); - my $auth_marc = MARC::Record->new_from_xml($auth_rec->marc()); - my $cni = $auth_marc->field('003')->data(); - - $bib_field->add_subfields('0' => "($cni)$auth_id"); - $changed = 1; - } - } - if ($changed) { - my $editor = OpenILS::Utils::CStoreEditor->new(xact=>1); - # print $marc->as_formatted(); - my $xml = $marc->as_xml_record(); - $xml =~ s/\n//sgo; - $xml =~ s/^<\?xml.+\?\s*>//go; - $xml =~ s/>\s+entityize($xml); - - $record->marc($xml); - $editor->update_biblio_record_entry($record); - $editor->commit(); - } - } otherwise { - my $err = shift; - print STDERR "\nRecord # $rec_id : $err\n"; - import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire process - } -} - -__END__ - -=head1 NAME - -authority_control_fields.pl - Controls fields in bibliographic records with authorities in Evergreen - -=head1 SYNOPSIS - -C [B<--configuration>=I] -[[B<--record>=I[ B<--record>=I]]] | [B<--all>] | [B<--start_id>=I B<--end_id>=I] - -=head1 DESCRIPTION - -For a given set of records: - -=over - -=item * Iterate through the list of fields that are controlled fields - -=item * Iterate through the list of subfields that are controlled for -that given field - -=item * Search for a matching authority record for that combination of -field + subfield(s) - -=over - -=item * If we find a match, then add a $0 subfield to that field identifying -the controlling authority record - -=item * If we do not find a match, then insert a row into an "uncontrolled" -table identifying the record ID, field, and subfield(s) that were not controlled - -=back - -=item * Iterate through the list of floating subdivisions - -=over - -=item * If we find a match, then add a $0 subfield to that field identifying -the controlling authority record - -=item * If we do not find a match, then insert a row into an "uncontrolled" -table identifying the record ID, field, and subfield(s) that were not controlled - -=back - -=item * If we changed the record, update it in the database - -=back - -=head1 OPTIONS - -=over - -=item * B<-c> I, B<--configuration>=I - -Specifies the OpenSRF configuration file used to connect to the OpenSRF router. -Defaults to F - -=item * B<-r> I, B<--record>=I - -Specifies the bibliographic record ID (found in the C -column) of the record to process. This option may be specified more than once -to process multiple records in a single run. - -=item * B<-a>, B<--all> - -Specifies that all bibliographic records should be processed. For large -databases, this may take an extraordinarily long amount of time. - -=item * B<-s> I, B<--start_id>=I - -Specifies the starting ID of the range of bibliographic records to process. -This option is ignored unless it is accompanied by the B<-e> or B<--end_id> -option. - -=item * B<-e> I, B<--end_id>=I - -Specifies the ending ID of the range of bibliographic records to process. -This option is ignored unless it is accompanied by the B<-s> or B<--start> -option. - -=back - -=head1 EXAMPLES - - authority_control_fields.pl --start_id 1 --end_id 50000 - -Processes the bibliographic records with IDs between 1 and 50,000 using the -default OpenSRF configuration file for connection information. - -=head1 AUTHOR - -Dan Scott - -=head1 COPYRIGHT AND LICENSE - -Copyright 2010-2011 by Dan Scott - -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License -as published by the Free Software Foundation; either version 2 -of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -=cut - diff --git a/Open-ILS/src/support-scripts/authority_control_fields.pl.in b/Open-ILS/src/support-scripts/authority_control_fields.pl.in new file mode 100755 index 0000000000..07c9f77f10 --- /dev/null +++ b/Open-ILS/src/support-scripts/authority_control_fields.pl.in @@ -0,0 +1,563 @@ +#!/usr/bin/perl +# Copyright (C) 2010-2011 Laurentian University +# Author: Dan Scott +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# --------------------------------------------------------------- + +use strict; +use warnings; +use DBI; +use Getopt::Long; +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use OpenSRF::System; +use OpenILS::Utils::Fieldmapper; +use OpenSRF::Utils::SettingsClient; +use OpenSRF::EX qw/:try/; +use Encode; +use Unicode::Normalize; +use OpenILS::Application::AppUtils; +use Data::Dumper; +use Pod::Usage qw/ pod2usage /; + +my ($start_id, $end_id); +my $bootstrap = '@sysconfdir@/opensrf_core.xml'; +my @records; + +my %options; +my $result = GetOptions( + \%options, + 'configuration=s' => \$bootstrap, + 'record=i' => \@records, + 'all', 'help', + 'start_id=i' => \$start_id, + 'end_id=i' => \$end_id, +); + +if (!$result or $options{help}) { + pod2usage(0); +} + +OpenSRF::System->bootstrap_client(config_file => $bootstrap); +Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); + +# must be loaded and initialized after the IDL is parsed +use OpenILS::Utils::CStoreEditor; +OpenILS::Utils::CStoreEditor::init(); + +my $e = OpenILS::Utils::CStoreEditor->new; +my $undeleted; +if ($options{all}) { + # get a list of all non-deleted records from Evergreen + # open-ils.cstore open-ils.cstore.direct.biblio.record_entry.id_list.atomic {"deleted":"f"} + $undeleted = $e->request( + 'open-ils.cstore.direct.biblio.record_entry.id_list.atomic', + [{deleted => 'f'}, {id => { '>' => 0}}] + ); + @records = @$undeleted; +} + +if ($start_id and $end_id) { + @records = ($start_id .. $end_id); +} +# print Dumper($undeleted, \@records); + +# Hash of controlled fields & subfields in bibliographic records, and their +# corresponding controlling fields & subfields in the authority record +# +# So, if the bib 650$a can be controlled by an auth 150$a, that maps to: +# 650 => { a => { 150 => 'a'}} +my %controllees = ( + 100 => { a => { 100 => 'a' }, + b => { 100 => 'b' }, + c => { 100 => 'c' }, + d => { 100 => 'd' }, + e => { 100 => 'e' }, + f => { 100 => 'f' }, + g => { 100 => 'g' }, + j => { 100 => 'j' }, + k => { 100 => 'k' }, + l => { 100 => 'l' }, + n => { 100 => 'n' }, + p => { 100 => 'p' }, + q => { 100 => 'q' }, + t => { 100 => 't' }, + u => { 100 => 'u' }, + 4 => { 100 => '4' }, + }, + 110 => { a => { 110 => 'a' }, + b => { 110 => 'b' }, + c => { 110 => 'c' }, + d => { 110 => 'd' }, + e => { 110 => 'e' }, + f => { 110 => 'f' }, + g => { 110 => 'g' }, + k => { 110 => 'k' }, + l => { 110 => 'l' }, + n => { 110 => 'n' }, + p => { 110 => 'p' }, + t => { 110 => 't' }, + u => { 110 => 'u' }, + 4 => { 110 => '4' }, + }, + 111 => { a => { 111 => 'a' }, + c => { 111 => 'c' }, + d => { 111 => 'd' }, + e => { 111 => 'e' }, + f => { 111 => 'f' }, + g => { 111 => 'g' }, + j => { 111 => 'j' }, + k => { 111 => 'k' }, + l => { 111 => 'l' }, + n => { 111 => 'n' }, + p => { 111 => 'p' }, + q => { 111 => 'q' }, + t => { 111 => 't' }, + u => { 111 => 'u' }, + 4 => { 111 => '4' }, + }, + 130 => { a => { 130 => 'a' }, + d => { 130 => 'd' }, + f => { 130 => 'f' }, + g => { 130 => 'g' }, + h => { 130 => 'h' }, + k => { 130 => 'k' }, + l => { 130 => 'l' }, + m => { 130 => 'm' }, + n => { 130 => 'n' }, + o => { 130 => 'o' }, + p => { 130 => 'p' }, + r => { 130 => 'r' }, + s => { 130 => 's' }, + t => { 130 => 't' }, + }, + 600 => { a => { 100 => 'a' }, + b => { 100 => 'b' }, + c => { 100 => 'c' }, + d => { 100 => 'd' }, + e => { 100 => 'e' }, + f => { 100 => 'f' }, + g => { 100 => 'g' }, + h => { 100 => 'h' }, + j => { 100 => 'j' }, + k => { 100 => 'k' }, + l => { 100 => 'l' }, + m => { 100 => 'm' }, + n => { 100 => 'n' }, + o => { 100 => 'o' }, + p => { 100 => 'p' }, + q => { 100 => 'q' }, + r => { 100 => 'r' }, + s => { 100 => 's' }, + t => { 100 => 't' }, + v => { 100 => 'v' }, + x => { 100 => 'x' }, + y => { 100 => 'y' }, + z => { 100 => 'z' }, + 4 => { 100 => '4' }, + }, + 610 => { a => { 110 => 'a' }, + b => { 110 => 'b' }, + c => { 110 => 'c' }, + d => { 110 => 'd' }, + e => { 110 => 'e' }, + f => { 110 => 'f' }, + g => { 110 => 'g' }, + h => { 110 => 'h' }, + k => { 110 => 'k' }, + l => { 110 => 'l' }, + m => { 110 => 'm' }, + n => { 110 => 'n' }, + o => { 110 => 'o' }, + p => { 110 => 'p' }, + r => { 110 => 'r' }, + s => { 110 => 's' }, + t => { 110 => 't' }, + v => { 110 => 'v' }, + x => { 110 => 'x' }, + y => { 110 => 'y' }, + z => { 110 => 'z' }, + }, + 611 => { a => { 111 => 'a' }, + c => { 111 => 'c' }, + d => { 111 => 'd' }, + e => { 111 => 'e' }, + f => { 111 => 'f' }, + g => { 111 => 'g' }, + h => { 111 => 'h' }, + j => { 111 => 'j' }, + k => { 111 => 'k' }, + l => { 111 => 'l' }, + n => { 111 => 'n' }, + p => { 111 => 'p' }, + q => { 111 => 'q' }, + s => { 111 => 's' }, + t => { 111 => 't' }, + v => { 111 => 'v' }, + x => { 111 => 'x' }, + y => { 111 => 'y' }, + z => { 111 => 'z' }, + }, + 630 => { a => { 130 => 'a' }, + d => { 130 => 'd' }, + f => { 130 => 'f' }, + g => { 130 => 'g' }, + h => { 130 => 'h' }, + k => { 130 => 'k' }, + l => { 130 => 'l' }, + m => { 130 => 'm' }, + n => { 130 => 'n' }, + o => { 130 => 'o' }, + p => { 130 => 'p' }, + r => { 130 => 'r' }, + s => { 130 => 's' }, + t => { 130 => 't' }, + v => { 130 => 'v' }, + x => { 130 => 'x' }, + y => { 130 => 'y' }, + z => { 130 => 'z' }, + }, + 648 => { a => { 148 => 'a' }, + v => { 148 => 'v' }, + x => { 148 => 'x' }, + y => { 148 => 'y' }, + z => { 148 => 'z' }, + }, + 650 => { a => { 150 => 'a' }, + b => { 150 => 'b' }, + v => { 150 => 'v' }, + x => { 150 => 'x' }, + y => { 150 => 'y' }, + z => { 150 => 'z' }, + }, + 651 => { a => { 151 => 'a' }, + v => { 151 => 'v' }, + x => { 151 => 'x' }, + y => { 151 => 'y' }, + z => { 151 => 'z' }, + }, + 655 => { a => { 155 => 'a' }, + v => { 155 => 'v' }, + x => { 155 => 'x' }, + y => { 155 => 'y' }, + z => { 155 => 'z' }, + }, + 700 => { a => { 100 => 'a' }, + b => { 100 => 'b' }, + c => { 100 => 'c' }, + d => { 100 => 'd' }, + e => { 100 => 'e' }, + f => { 100 => 'f' }, + g => { 100 => 'g' }, + j => { 100 => 'j' }, + k => { 100 => 'k' }, + l => { 100 => 'l' }, + n => { 100 => 'n' }, + p => { 100 => 'p' }, + q => { 100 => 'q' }, + t => { 100 => 't' }, + u => { 100 => 'u' }, + 4 => { 100 => '4' }, + }, + 710 => { a => { 110 => 'a' }, + b => { 110 => 'b' }, + c => { 110 => 'c' }, + d => { 110 => 'd' }, + e => { 110 => 'e' }, + f => { 110 => 'f' }, + g => { 110 => 'g' }, + k => { 110 => 'k' }, + l => { 110 => 'l' }, + n => { 110 => 'n' }, + p => { 110 => 'p' }, + t => { 110 => 't' }, + u => { 110 => 'u' }, + 4 => { 110 => '4' }, + }, + 711 => { a => { 111 => 'a' }, + c => { 111 => 'c' }, + d => { 111 => 'd' }, + e => { 111 => 'e' }, + f => { 111 => 'f' }, + g => { 111 => 'g' }, + j => { 111 => 'j' }, + k => { 111 => 'k' }, + l => { 111 => 'l' }, + n => { 111 => 'n' }, + p => { 111 => 'p' }, + q => { 111 => 'q' }, + t => { 111 => 't' }, + u => { 111 => 'u' }, + 4 => { 111 => '4' }, + }, + 730 => { a => { 130 => 'a' }, + d => { 130 => 'd' }, + f => { 130 => 'f' }, + g => { 130 => 'g' }, + h => { 130 => 'h' }, + k => { 130 => 'k' }, + l => { 130 => 'l' }, + m => { 130 => 'm' }, + n => { 130 => 'n' }, + o => { 130 => 'o' }, + p => { 130 => 'p' }, + r => { 130 => 'r' }, + s => { 130 => 's' }, + t => { 130 => 't' }, + }, + 751 => { a => { 151 => 'a' }, + v => { 151 => 'v' }, + x => { 151 => 'x' }, + y => { 151 => 'y' }, + z => { 151 => 'z' }, + }, + 830 => { a => { 830 => 'a' }, + d => { 130 => 'd' }, + f => { 130 => 'f' }, + g => { 130 => 'g' }, + h => { 130 => 'h' }, + k => { 130 => 'k' }, + l => { 130 => 'l' }, + m => { 130 => 'm' }, + n => { 130 => 'n' }, + o => { 130 => 'o' }, + p => { 130 => 'p' }, + r => { 130 => 'r' }, + s => { 130 => 's' }, + t => { 130 => 't' }, + }, +); + +foreach my $rec_id (@records) { + # print "$rec_id\n"; + + # State variable; was the record changed? + my $changed; + + # get the record + my $record = $e->retrieve_biblio_record_entry($rec_id); + next unless $record; + # print Dumper($record); + + try { + my $marc = MARC::Record->new_from_xml($record->marc()); + + # get the list of controlled fields + my @c_fields = keys %controllees; + + foreach my $c_tag (@c_fields) { + my @c_subfields = keys %{$controllees{"$c_tag"}}; + # print "Field: $field subfields: "; + # foreach (@subfields) { print "$_ "; } + + # Get the MARCXML from the record and check for controlled fields/subfields + my @bib_fields = ($marc->field($c_tag)); + foreach my $bib_field (@bib_fields) { + # print $_->as_formatted(); + my %match_subfields; + my $match_tag; + my @searches; + foreach my $c_subfield (@c_subfields) { + my $sf = $bib_field->subfield($c_subfield); + if ($sf) { + # Give me the first element of the list of authority controlling tags for this subfield + # XXX Will we need to support more than one controlling tag per subfield? Probably. That + # will suck. Oh well, leave that up to Ole to implement. + $match_subfields{$c_subfield} = (keys %{$controllees{$c_tag}{$c_subfield}})[0]; + $match_tag = $match_subfields{$c_subfield}; + push @searches, {term => $sf, subfield => $c_subfield}; + } + } + # print Dumper(\%match_subfields); + next if !$match_tag; + + my @tags = ($match_tag); + + # print "Controlling tag: $c_tag and match tag $match_tag\n"; + # print Dumper(\@tags, \@searches); + + # Now we've built up a complete set of matching controlled + # subfields for this particular field; let's check to see if + # we have a matching authority record + my $session = OpenSRF::AppSession->create("open-ils.search"); + my $validates = $session->request("open-ils.search.authority.validate.tag.id_list", + "tags", \@tags, "searches", \@searches + )->gather(); + $session->disconnect(); + + # print Dumper($validates); + + # Protect against failed (error condition) search request + if (!$validates) { + print STDERR "Search for matching authority failed; record # $rec_id\n"; + next; + } + + if (scalar(@$validates) == 0) { + next; + } + + # Iterate through the returned authority record IDs to delete any + # matching $0 subfields already in the bib record + foreach my $auth_zero (@$validates) { + $bib_field->delete_subfield(code => '0', match => qr/\)$auth_zero$/); + } + + # Okay, we have a matching authority control; time to + # add the magical subfield 0. Use the first returned auth + # record as a match. + my $auth_id = @$validates[0]; + my $auth_rec = $e->retrieve_authority_record_entry($auth_id); + my $auth_marc = MARC::Record->new_from_xml($auth_rec->marc()); + my $cni = $auth_marc->field('003')->data(); + + $bib_field->add_subfields('0' => "($cni)$auth_id"); + $changed = 1; + } + } + if ($changed) { + my $editor = OpenILS::Utils::CStoreEditor->new(xact=>1); + # print $marc->as_formatted(); + my $xml = $marc->as_xml_record(); + $xml =~ s/\n//sgo; + $xml =~ s/^<\?xml.+\?\s*>//go; + $xml =~ s/>\s+entityize($xml); + + $record->marc($xml); + $editor->update_biblio_record_entry($record); + $editor->commit(); + } + } otherwise { + my $err = shift; + print STDERR "\nRecord # $rec_id : $err\n"; + import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire process + } +} + +__END__ + +=head1 NAME + +authority_control_fields.pl - Controls fields in bibliographic records with authorities in Evergreen + +=head1 SYNOPSIS + +C [B<--configuration>=I] +[[B<--record>=I[ B<--record>=I]]] | [B<--all>] | [B<--start_id>=I B<--end_id>=I] + +=head1 DESCRIPTION + +For a given set of records: + +=over + +=item * Iterate through the list of fields that are controlled fields + +=item * Iterate through the list of subfields that are controlled for +that given field + +=item * Search for a matching authority record for that combination of +field + subfield(s) + +=over + +=item * If we find a match, then add a $0 subfield to that field identifying +the controlling authority record + +=item * If we do not find a match, then insert a row into an "uncontrolled" +table identifying the record ID, field, and subfield(s) that were not controlled + +=back + +=item * Iterate through the list of floating subdivisions + +=over + +=item * If we find a match, then add a $0 subfield to that field identifying +the controlling authority record + +=item * If we do not find a match, then insert a row into an "uncontrolled" +table identifying the record ID, field, and subfield(s) that were not controlled + +=back + +=item * If we changed the record, update it in the database + +=back + +=head1 OPTIONS + +=over + +=item * B<-c> I, B<--configuration>=I + +Specifies the OpenSRF configuration file used to connect to the OpenSRF router. +Defaults to F<@sysconfdir@/opensrf_core.xml> + +=item * B<-r> I, B<--record>=I + +Specifies the bibliographic record ID (found in the C +column) of the record to process. This option may be specified more than once +to process multiple records in a single run. + +=item * B<-a>, B<--all> + +Specifies that all bibliographic records should be processed. For large +databases, this may take an extraordinarily long amount of time. + +=item * B<-s> I, B<--start_id>=I + +Specifies the starting ID of the range of bibliographic records to process. +This option is ignored unless it is accompanied by the B<-e> or B<--end_id> +option. + +=item * B<-e> I, B<--end_id>=I + +Specifies the ending ID of the range of bibliographic records to process. +This option is ignored unless it is accompanied by the B<-s> or B<--start> +option. + +=back + +=head1 EXAMPLES + + authority_control_fields.pl --start_id 1 --end_id 50000 + +Processes the bibliographic records with IDs between 1 and 50,000 using the +default OpenSRF configuration file for connection information. + +=head1 AUTHOR + +Dan Scott + +=head1 COPYRIGHT AND LICENSE + +Copyright 2010-2011 by Dan Scott + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +=cut + diff --git a/configure.ac b/configure.ac index 81312585fb..dd0b098b5f 100644 --- a/configure.ac +++ b/configure.ac @@ -378,6 +378,7 @@ AC_CONFIG_FILES([Makefile Open-ILS/src/extras/import/marc2bre.pl Open-ILS/src/extras/import/marc2sre.pl Open-ILS/src/extras/import/parallel_pg_loader.pl + Open-ILS/src/support-scripts/authority_control_fields.pl Open-ILS/src/support-scripts/marc_export Open-ILS/src/perlmods/Makefile Open-ILS/src/perlmods/lib/OpenILS/Utils/Cronscript.pm], @@ -388,6 +389,7 @@ AC_CONFIG_FILES([Makefile if test -e "./Open-ILS/src/extras/import/marc2bre.pl"; then chmod 755 Open-ILS/src/extras/import/marc2bre.pl; fi; if test -e "./Open-ILS/src/extras/import/marc2sre.pl"; then chmod 755 Open-ILS/src/extras/import/marc2sre.pl; fi; if test -e "./Open-ILS/src/extras/import/parallel_pg_loader.pl"; then chmod 755 Open-ILS/src/extras/import/parallel_pg_loader.pl; fi; + if test -e "./Open-ILS/src/support-scripts/authority_control_fields.pl"; then chmod 755 Open-ILS/src/support-scripts/authority_control_fields.pl; fi; if test -e "./Open-ILS/src/support-scripts/marc_export"; then chmod 755 Open-ILS/src/support-scripts/marc_export; fi; ]) AC_OUTPUT