From ade1aece4d28feb90abae179ff02f52d7690c5f0 Mon Sep 17 00:00:00 2001 From: dbs Date: Tue, 13 Jul 2010 15:56:33 +0000 Subject: [PATCH] Stub script for adding authority control to an existing set of bibs The goal of this script is to iterate over every bib record listed at the command line (or --all) and check every controlled bib field for matching subfield combinations in authority records. This is going to be slow, friends, but for this iteration we're looking at paying a one-time cost; after that, controlled fields will automatically be maintained by triggers on the authority.record_entry tables. The bib field-to-auth field mapping needs to be fleshed out (should be comparable to the mapping in marcedit.js) and floating subdivisions still need to be controlled. Probably in a separate pass over the bibs, given the current "design". We extend open-ils.s*.authority.validate.tag to offer an id_list() variant so that we can get the corresponding authority record ID back from the call, instead of just a count. git-svn-id: svn://svn.open-ils.org/ILS/trunk@16918 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- .../OpenILS/Application/Search/Authority.pm | 14 ++ .../Application/Storage/Publisher/authority.pm | 23 ++- .../support-scripts/authority_control_fields.pl | 198 +++++++++++++++++++++ 3 files changed, 232 insertions(+), 3 deletions(-) create mode 100644 Open-ILS/src/support-scripts/authority_control_fields.pl diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Search/Authority.pm b/Open-ILS/src/perlmods/OpenILS/Application/Search/Authority.pm index 0edce5fa46..37bc763109 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/Search/Authority.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/Search/Authority.pm @@ -32,6 +32,20 @@ __PACKAGE__->register_method( note => "Validates authority data from existing controlled terms", ); +sub validate_authority_return_records_by_id { + my $self = shift; + my $client = shift; + + my $session = OpenSRF::AppSession->create("open-ils.storage"); + return $session->request( 'open-ils.storage.authority.validate.tag.id_list' => @_ )->gather(1); +} +__PACKAGE__->register_method( + method => "validate_authority_return_records_by_id", + api_name => "open-ils.search.authority.validate.tag.id_list", + argc => 4, + note => "Validates authority data from existing controlled terms", +); + sub search_authority { my $self = shift; my $client = shift; diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/authority.pm b/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/authority.pm index bdfe1eef8c..eab9c51f07 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/authority.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/Storage/Publisher/authority.pm @@ -48,13 +48,23 @@ sub validate_tag { "WHERE tag = ? AND subfield = ? AND value = ?"; } - my $sql = 'SELECT COUNT(DISTINCT record) FROM ('; + my $sql; + if ($self->api_name =~ /id_list/) { + $sql = 'SELECT DISTINCT record FROM ('; + } else { + $sql = 'SELECT COUNT(DISTINCT record) FROM ('; + } $sql .= 'SELECT record FROM (('.join(') INTERSECT (', @selects).')) AS x '; $sql .= "JOIN $search_table recheck USING (record) WHERE recheck.tag = ? "; $sql .= "GROUP BY 1 HAVING (COUNT(recheck.id) - ?) = 0) AS foo;"; - my $count = authority::full_rec->db_Main->selectcol_arrayref( $sql, {}, @values, $t, scalar(@searches) )->[0]; - return $count if ($count > 0); + if ($self->api_name =~ /id_list/) { + my $id_list = authority::full_rec->db_Main->selectcol_arrayref( $sql, {}, @values, $t, scalar(@searches) ); + return $id_list if (scalar(@$id_list)> 0); + } else { + my $count = authority::full_rec->db_Main->selectcol_arrayref( $sql, {}, @values, $t, scalar(@searches) )->[0]; + return $count if ($count > 0); + } } return 0; @@ -65,6 +75,13 @@ __PACKAGE__->register_method( api_level => 1, ); +__PACKAGE__->register_method( + api_name => "open-ils.storage.authority.validate.tag.id_list", + method => 'validate_tag', + api_level => 1, +); + + sub find_authority_marc { my $self = shift; my $client = shift; diff --git a/Open-ILS/src/support-scripts/authority_control_fields.pl b/Open-ILS/src/support-scripts/authority_control_fields.pl new file mode 100644 index 0000000000..f6b2c94190 --- /dev/null +++ b/Open-ILS/src/support-scripts/authority_control_fields.pl @@ -0,0 +1,198 @@ +#!/usr/bin/perl +# Copyright (C) 2010 Laurentian University +# Author: Dan Scott +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# --------------------------------------------------------------- + +use strict; +use warnings; +use DBI; +use Getopt::Long; +use MARC::Record; +use MARC::File::XML; +use OpenSRF::System; +use OpenILS::Utils::Fieldmapper; +use OpenSRF::Utils::SettingsClient; +use Encode; +use Unicode::Normalize; +use OpenILS::Application::AppUtils; +# use Data::Dumper; + +=head1 + +For a given set of records (specified by ID at the command line, or special option --all): + +=over + +=item * Iterate through the list of fields that are controlled fields + +=item * Iterate through the list of subfields that are controlled for +that given field + +=item * Search for a matching authority record for that combination of +field + subfield(s) + +=over + +=item * If we find a match, then add a $0 subfield to that field identifying +the controlling authority record + +=item * If we do not find a match, then insert a row into an "uncontrolled" +table identifying the record ID, field, and subfield(s) that were not controlled + +=back + +=item * Iterate through the list of floating subdivisions + +=over + +=item * If we find a match, then add a $0 subfield to that field identifying +the controlling authority record + +=item * If we do not find a match, then insert a row into an "uncontrolled" +table identifying the record ID, field, and subfield(s) that were not controlled + +=back + +=item * If we changed the record, update it in the database + +=back + +=cut + +my $all_records; +my $bootstrap = '/openils/conf/opensrf_core.xml'; +my @records; +my $result = GetOptions( + 'configuration=s' => \$bootstrap, + 'record=s' => \@records, + 'all' => \$all_records +); + +OpenSRF::System->bootstrap_client(config_file => $bootstrap); +Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); + +# must be loaded and initialized after the IDL is parsed +use OpenILS::Utils::CStoreEditor; +OpenILS::Utils::CStoreEditor::init(); + +my $editor = OpenILS::Utils::CStoreEditor->new; +if ($all_records) { + # get a list of all non-deleted records from Evergreen + # open-ils.cstore open-ils.cstore.direct.biblio.record_entry.id_list.atomic {"deleted":"f"} + push @records, $editor->request( + 'open-ils.cstore.direct.biblio.record_entry.id_list.atomic', + {deleted => 'f'}, + {id => { '>' => 0}} + ); +} +# print Dumper(\@records); + +# Hash of controlled fields & subfields in bibliographic records, and their +# corresponding controlling fields & subfields in the authority record +# +# So, if the bib 650$a can be controlled by an auth 150$a, that maps to: +# 650 => { a => { 150 => 'a'}} +my %controllees = ( + 110 => { a => { 110 => 'a' }, + d => { 110 => 'd' }, + e => { 110 => 'e' } + }, + 711 => { a => { 111 => 'a' }, + c => { 111 => 'c' }, + d => { 111 => 'd' } + } + +); + +foreach my $rec_id (@records) { + + my $e = OpenILS::Utils::CStoreEditor->new(xact=>1); + # State variable; was the record changed? + my $changed; + + # get the record + my $record = $e->retrieve_biblio_record_entry($rec_id); + next unless $record; + # print Dumper($record); + + my $marc = MARC::Record->new_from_xml($record->marc()); + + # get the list of controlled fields + my @c_fields = keys %controllees; + + foreach my $c_tag (@c_fields) { + my @c_subfields = keys %{$controllees{"$c_tag"}}; + # print "Field: $field subfields: "; + # foreach (@subfields) { print "$_ "; } + + # Get the MARCXML from the record and check for controlled fields/subfields + my @bib_fields = ($marc->field($c_tag)); + foreach my $bib_field (@bib_fields) { + # print $_->as_formatted(); + my %match_subfields; + my $match_tag; + my @searches; + foreach my $c_subfield (@c_subfields) { + my $sf = $bib_field->subfield($c_subfield); + if ($sf) { + # Give me the first element of the list of authority controlling tags for this subfield + # XXX Will we need to support more than one controlling tag per subfield? Probably. That + # will suck. Oh well, leave that up to Ole to implement. + $match_subfields{$c_subfield} = (keys %{$controllees{$c_tag}{$c_subfield}})[0]; + $match_tag = $match_subfields{$c_subfield}; + push @searches, {term => $sf, subfield => $c_subfield}; + } + } + # print Dumper(\%match_subfields); + + my @tags = ($match_tag); + # Now we've built up a complete set of matching controlled + # subfields for this particular field; let's check to see if + # we have a matching authority record + my $session = OpenSRF::AppSession->create("open-ils.search"); + my $validates = $session->request("open-ils.search.authority.validate.tag.id_list", + "tags", \@tags, "searches", \@searches + )->gather(); + $session->disconnect(); + + # print Dumper($validates); + + if (scalar(@$validates) == 0) { + next; + } + + # Okay, we have a matching authority control; time to + # add the magical subfield 0 + my $auth_id = @$validates[0]; + my $auth_rec = $e->retrieve_authority_record_entry($auth_id); + my $auth_marc = MARC::Record->new_from_xml($auth_rec->marc()); + my $cni = $auth_marc->field('003')->data(); + + $bib_field->add_subfields('0' => "($cni)$auth_id"); + $changed = 1; + } + } + if ($changed) { + # print $marc->as_formatted(); + my $xml = $marc->as_xml_record(); + $xml =~ s/\n//sgo; + $xml =~ s/^<\?xml.+\?\s*>//go; + $xml =~ s/>\s+entityize($xml); + + $record->marc($xml); + $e->update_biblio_record_entry($record); + } + $e->commit(); +} -- 2.11.0