use OpenILS::Application::AppUtils;
use Data::Dumper;
use Pod::Usage qw/ pod2usage /;
+use DateTime;
+$Data::Dumper::Indent = 0;
MARC::Charset->assume_unicode(1);
my ($start_id, $end_id, $refresh);
my $input_file ='';
my $bootstrap = '/openils/conf/opensrf_core.xml';
my @records;
+my $verbose = 0;
my %options;
my $result = GetOptions(
'end_id=i' => \$end_id,
'days_back=i' => \$days_back,
'file=s' => \$input_file,
+ 'verbose' => \$verbose
);
+sub announce {
+ my $msg = shift;
+ return unless $verbose;
+ print DateTime->now->strftime('%F %T') . " $msg\n";
+}
+
if (!$result or $options{help}) {
pod2usage(0);
}
z => { 130 => 'z' },
},
);
+
+# mapping of authority leader/11 "Subject heading system/thesaurus"
+# to the matching bib record indicator
+my %AUTH_TO_BIB_IND2 = (
+ 'a' => '0', # Library of Congress Subject Headings (ADULT)
+ 'b' => '1', # Library of Congress Subject Headings (JUVENILE)
+ 'c' => '2', # Medical Subject Headings
+ 'd' => '3', # National Agricultural Library Subject Authority File
+ 'n' => '4', # Source not specified
+ 'k' => '5', # Canadian Subject Headings
+ 'v' => '6', # Répertoire de vedettes-matière
+ 'n' => '7', # Source specified in subfield $2
+ 'z' => '8' # Other
+);
+
my $start_time = localtime();
+
if($input_file) {
- print "Start " . $start_time . " for " . scalar(@records) . " records.\n";
+ announce("Start $start_time for ".scalar(@records)." records");
+} elsif($start_id) {
+ announce("Start $start_time for record range: $start_id => $end_id");
} else {
- print "Start " . $start_time . " for records " . $start_id . " to " . $end_id . "\n";
+ announce("Start $start_time for all records");
+}
+
+# given a set of authority record ID's and a controlled bib field
+# indicator 2 (thesaurus) value, returns the ID of the first
+# authority record in the set that matches the thesaurus.
+sub find_matching_auth_for_thesaurus {
+ my ($e, $cfield_ind2, $auth_ids) = @_;
+
+ my $auth_leaders = $e->json_query({
+ select => {afr => ['record', 'value']},
+ from => 'afr',
+ where => {'+afr' => {tag => '008', record => $auth_ids}}
+ });
+
+ for my $leader (@$auth_leaders) {
+ my $value = $leader->{value};
+ next unless $value;
+ my $thesaurus = substr($value, 11, 1); # leader/11 -- zero based.
+ return $leader->{record} if
+ $AUTH_TO_BIB_IND2{$thesaurus} eq $cfield_ind2;
+ }
+
+ return undef;
}
+
foreach my $rec_id (@records) {
- # print "$rec_id\n";
+ announce("processing bib record $rec_id");
# State variable; was the record changed?
my $changed = 0;
# get the record
my $record = $e->retrieve_biblio_record_entry($rec_id);
next unless $record;
- # print Dumper($record);
try {
my $marc = MARC::Record->new_from_xml($record->marc());
foreach my $c_tag (@c_fields) {
my @c_subfields = keys %{$controllees{"$c_tag"}};
- # print "Field: $field subfields: ";
- # foreach (@subfields) { print "$_ "; }
+ announce "Inspecting controlled field $c_tag";
# Get the MARCXML from the record and check for controlled fields/subfields
my @bib_fields = ($marc->field($c_tag));
push @searches, map {{term => $_, subfield => $c_subfield}} @sf_values;
}
}
- # print Dumper(\%match_subfields);
next if !$match_tag;
- my @tags = ($match_tag);
+ announce("Searching for matches (auth tag=$match_tag): ".
+ Dumper(\@searches));
- # print "Controlling tag: $c_tag and match tag $match_tag\n";
- # print Dumper(\@tags, \@searches);
+ my @tags = ($match_tag);
# Now we've built up a complete set of matching controlled
# subfields for this particular field; let's check to see if
)->gather();
$session->disconnect();
- # print Dumper($validates);
# Protect against failed (error condition) search request
if (!$validates) {
print STDERR "Search for matching authority failed; record # $rec_id\n";
next if (!$changed);
}
-
- my $num_records = scalar(@$validates);
-
- # Only add linking if one or more was found, but we may have changed
- # the record already if in --refresh mode.
- if (scalar(@$validates) > 0) {
-
- # Iterate through the returned authority record IDs to delete any
- # matching $0 subfields already in the bib record
- foreach my $auth_zero (@$validates) {
- $bib_field->delete_subfield(code => '0', match => qr/\)$auth_zero$/);
- }
-
- for(my $i = 0; $i < $num_records; $i++)
- {
- # Okay, we have a matching authority control; time to
- # add the magical subfield 0. Use the first returned auth
- # record as a match.
- my $auth_id = @$validates[$i];
- my $auth_rec = $e->retrieve_authority_record_entry($auth_id);
- my $auth_marc = MARC::Record->new_from_xml($auth_rec->marc());
-
- my %Auth2BibIndicatorTwo = (
- 'a' => '0', # Library of Congress Subject Headings (ADULT)
- 'b' => '1', # Library of Congress Subject Headings (JUVENILE)
- 'c' => '2', # Medical Subject Headings
- 'd' => '3', # National Agricultural Library Subject Authority File
- 'n' => '4', # Source not specified
- 'k' => '5', # Canadian Subject Headings
- 'v' => '6', # Répertoire de vedettes-matière
- 'n' => '7', # Source specified in subfield $2
- 'z' => '8' # Other
- );
-
- my @marc_fieldz = $marc->fields();
- my $Auth_Indic = substr($auth_marc->field('008')->data(), 10, 1);
- while ((my $key, my $value) = each (%Auth2BibIndicatorTwo))
- {
- if($Auth_Indic eq $key)
- {
- foreach my $auth_field (@marc_fieldz)
- {
- if($auth_field->tag() ge 650 && $auth_field->tag() le 659) # 650-659
- {
- #print "Trying To Match Against: " . $Auth2BibIndicatorTwo{$key} . "\n";
- if($auth_field->indicator(2) eq $Auth2BibIndicatorTwo{$key})
- {
- #print "Found Match Between Bib Record(" . $rec_id . ") and Auth Record(" . $auth_marc->field('901')->subfield('c') . ")\n";
- if ($auth_marc->field('003')) {
- my $cni = $auth_marc->field('003')->data();
- $bib_field->add_subfields('0' => "($cni)$auth_id");
- $changed = 1;
- } else {
- print "Authority # $auth_id missing field '003'\n";
- next if (!$changed);
- }
- }
- }
- }
- }
- }
+
+ announce("Match query returned @$validates");
+
+ # No matches found. Nothing left to do for this field.
+ next if scalar(@$validates) == 0;
+
+ # Iterate through the returned authority record IDs to delete any
+ # matching $0 subfields already in the bib record
+ foreach my $auth_zero (@$validates) {
+ $bib_field->delete_subfield(code => '0', match => qr/\)$auth_zero$/);
+ }
+
+ # Find the best authority record to use for linking.
+
+ my $auth_id;
+ my $stop_looking = 0;
+
+ if ($bib_field->tag() ge 650 and $bib_field->tag() le 659) {
+ # If the controlled bib field has an indicator 2 value
+ # indicating the thesarus, use the first authority
+ # record in the set with the same thesaurus.
+ # TODO: perhaps this step should be part of the
+ # validateion API search call above.
+
+ my $cfield_ind2 = $bib_field->indicator(2);
+
+ if (defined $cfield_ind2) {
+
+ $auth_id = find_matching_auth_for_thesaurus(
+ $e, $cfield_ind2, $validates) || '';
+
+ announce("Thesaurus match for ind2=$cfield_ind2 returned $auth_id");
+
+ # If we don't find a matching authority record using
+ # the same thesuarus, we have nothing to link to.
+ $stop_looking = 1;
}
+ }
+
+ # No best record found above. Use the first in the list.
+ $auth_id = $validates->[0] unless $auth_id or $stop_looking;
+
+ # Don't exit here just because we have no $auth_id. The
+ # bib field could have been changed above in the cleanup /
+ # delete phase.
+
+ if ($auth_id) {
+ # Add the control number agency info from the matching
+ # authority record to the controlled bib field.
+
+ my $auth_003 = $e->json_query({
+ select => {afr => ['value']},
+ from => 'afr',
+ where => {'+afr' =>
+ {tag => '003', record => $auth_id}}
+ })->[0];
+
+ my $cni = $auth_003->{value} || '';
+ $bib_field->add_subfields('0' => "($cni)$auth_id");
+ $changed = 1;
+
+ announce("auth=$auth_id cni=$cni. It's a match!");
}
}
}
+
if ($changed) {
# print $marc->as_formatted();
}
}
my $end_time = localtime();
+
if($input_file) {
- print "----- Stop " . $end_time . " for " . scalar(@records) . " records.\n";
+ announce("Stop $end_time for ".scalar(@records)." records");
+} elsif($start_id) {
+ announce("Start $end_time for record range: $start_id => $end_id");
} else {
- print "----- Stop " . $end_time . " for records " . $start_id . " to " . $end_id . "\n";
+ announce("Start $end_time for all records");
}
-
__END__
=head1 NAME