#
# So, if the bib 650$a can be controlled by an auth 150$a, that maps to:
# 650 => { a => { 150 => 'a'}}
-my %controllees = (
+my %_controllees = (
100 => { a => { 100 => 'a' },
b => { 100 => 'b' },
c => { 100 => 'c' },
},
);
+
+my %controllees;
+sub get_controlled_fields {
+
+ # Fetch bib field configs for fields we want to link.
+ my $bib_fields = $e->search_authority_control_set_bib_field([
+ {id => {'<>' => undef}},
+ {flesh => 1, flesh_fields => {acsbf => ['authority_field']}}
+ ]);
+
+ for my $bib_field (@$bib_fields) {
+
+ # We only link bib 1XX, 6XX, 7XX, and 8XX fields
+ next unless $bib_field->tag =~ /^[1678]/;
+
+ # Ignore authority 18X fields
+ next if $bib_field->authority_field->tag =~ /^18/;
+
+ my @subfields = split(//, $bib_field->authority_field->sf_list);
+
+ # Remove some nonsensical subfield mappings...
+
+ @subfields = grep { $_ !~ /[vxyz]/ } @subfields
+ unless ($bib_field->tag =~ /^6/);
+
+ @subfields = grep { $_ !~ /[mors]/ } @subfields
+ unless ($bib_field->tag =~ /(130|600|610|630|730|830)/);
+
+ $controllees{$bib_field->tag} ||= {};
+ $controllees{$bib_field->tag}->{$_} =
+ {$bib_field->authority_field->tag => $_} for @subfields;
+ }
+}
+
+get_controlled_fields();
+
+for my $tag (sort keys %controllees) {
+# if (!exists $_controllees{$tag}) {
+# print "adding mapping for bib tag $tag\n";
+# next;
+# }
+
+ for my $subfield (sort keys %{$controllees{$tag}}) {
+ for my $authtag (sort keys %{$controllees{$tag}->{$subfield}}) {
+ next if
+ $_controllees{$tag} &&
+ $_controllees{$tag}{$subfield} &&
+ $_controllees{$tag}{$subfield}{$authtag};
+
+ print "add mapping for bib $tag \$$subfield ".
+ "=> authority $authtag \$$subfield\n";
+ }
+ }
+}
+
+for my $tag (sort keys %_controllees) {
+ if (!exists $controllees{$tag}) {
+ print "removing mapping for bib tag $tag\n";
+ next;
+ }
+ for my $subfield (sort keys %{$_controllees{$tag}}) {
+ print "removing mapping for bib tag $tag \$$subfield\n"
+ unless exists $controllees{$tag}{$subfield};
+ }
+}
+
# mapping of authority leader/11 "Subject heading system/thesaurus"
# to the matching bib record indicator
my %AUTH_TO_BIB_IND2 = (
$xml =~ s/\p{Cc}//go;
$xml = OpenILS::Application::AppUtils->entityize($xml);
+ if ($record->marc eq $xml) {
+ $KU->announce('DEBUG', "Skipping update on bib ".$record->id.
+ "... no changes were made");
+ return;
+ }
+
+ $KU->announce('DEBUG', "Applying updates to bib ".$record->id);
+
$record->marc($xml);
my $editor = OpenILS::Utils::CStoreEditor->new(xact=>1);
my $total = scalar(@records);
$KU->announce('INFO', "processing $total bib records");
-$slot ||= 'N/A';
+# for logging
+if ($slot_count && defined $slot) {
+ $slot++
+} else {
+ $slot = 'N';
+ $slot_count = 'A';
+}
+
my $start_time = time();
foreach my $rec_id (@records) {
$count++;
- $KU->announce('INFO', "processed $count of $total [slot=$slot]")
- if ($count % 1000) == 0;
-
- $KU->announce('DEBUG',
- "processing bib record $rec_id [$count of $total; slot=$slot]");
+ $KU->announce('DEBUG', "processing bib record $rec_id ".
+ "[$count of $total; slot=$slot/$slot_count]");
# State variable; was the record changed?
my $changed = 0;
my $record = $e->retrieve_biblio_record_entry($rec_id);
next unless $record && $record->deleted eq 'f';
- eval {
- my $marc = MARC::Record->new_from_xml($record->marc());
+ my $marc;
- # get the list of controlled fields
- my @c_fields = keys %controllees;
+ eval { $marc = MARC::Record->new_from_xml($record->marc); };
- foreach my $c_tag (@c_fields) {
- my @c_subfields = keys %{$controllees{"$c_tag"}};
+ if ($@) {
+ $KU->announce('WARNING', "Error parsing record $rec_id : $@");
+ import MARC::File::XML; # Reset SAX parser
+ next;
+ }
- # Get the MARCXML from the record and check for controlled fields/subfields
- my @bib_fields = ($marc->field($c_tag));
- foreach my $bib_field (@bib_fields) {
+ # get the list of controlled fields
+ my @c_fields = keys %controllees;
- my $sf0 = $bib_field->subfield('0') || '';
- my $is_fast_heading = is_fast_heading($bib_field);
+ foreach my $c_tag (@c_fields) {
+ my @c_subfields = keys %{$controllees{"$c_tag"}};
- if ($is_fast_heading && $sf0 =~ /\)fst/) {
- # fast heading looks OK. ignore it.
- $KU->announce('DEBUG', "Ignoring FAST heading field on ".
- "rec=$rec_id and tag=$c_tag \$0 $sf0");
- next;
- }
+ # Get the MARCXML from the record and check for controlled fields/subfields
+ my @bib_fields = ($marc->field($c_tag));
+ foreach my $bib_field (@bib_fields) {
- if ($sf0) {
- $KU->announce('DEBUG',
- "Removing \$0 $sf0 for rec=$rec_id and tag=$c_tag");
- $bib_field->delete_subfield(code => '0');
- $changed = 1;
- }
+ my $sf0 = $bib_field->subfield('0') || '';
+ my $is_fast_heading = is_fast_heading($bib_field);
- if ($is_fast_heading) {
- # Update record after potentially removing a bogus
- # fast heading above.
- update_record($record, $marc) if $changed;
-
- # We don't control fast headings, so there's nothing
- # left to do. Move on to the next field...
- $KU->announce('DEBUG',
- "No linking performed on FAST heading ".
- "field on rec=$rec_id and tag=$c_tag");
- next;
- }
+ if ($is_fast_heading && $sf0 =~ /\)fst/) {
+ # fast heading looks OK. ignore it.
+ $KU->announce('DEBUG', "Ignoring FAST heading field on ".
+ "rec=$rec_id and tag=$c_tag \$0 $sf0");
+ next;
+ }
- my %match_subfields;
- my $match_tag;
- my @searches;
- foreach my $c_subfield (@c_subfields) {
- my @sf_values = $bib_field->subfield($c_subfield);
- if (@sf_values) {
- # Give me the first element of the list of authority controlling tags for this subfield
- # XXX Will we need to support more than one controlling tag per subfield? Probably. That
- # will suck. Oh well, leave that up to Ole to implement.
- $match_subfields{$c_subfield} = (keys %{$controllees{$c_tag}{$c_subfield}})[0];
- $match_tag = $match_subfields{$c_subfield};
- push @searches, map {{term => $_, subfield => $c_subfield}} @sf_values;
- }
- }
- next if !$match_tag;
+ if ($sf0) {
+ $KU->announce('DEBUG',
+ "Removing \$0 $sf0 for rec=$rec_id and tag=$c_tag");
+ $bib_field->delete_subfield(code => '0');
+ $changed = 1;
+ }
- $KU->announce('INFO',
- "Searching for matches on controlled field $c_tag ".
- "(auth tag=$match_tag): \n - ".Dumper(\@searches));
-
- my @tags = ($match_tag);
-
- # Now we've built up a complete set of matching controlled
- # subfields for this particular field; let's check to see if
- # we have a matching authority record
- my $session = OpenSRF::AppSession->create("open-ils.search");
- my $validates = $session->request("open-ils.search.authority.validate.tag.id_list",
- "tags", \@tags, "searches", \@searches
- )->gather();
- $session->disconnect();
-
-
- # Protect against failed (error condition) search request
- if (!$validates) {
- $KU->announce('WARNING',
- "Search for matching authority failed; record $rec_id");
- next unless $changed;
+ if ($is_fast_heading) {
+ # Update record after potentially removing a bogus
+ # fast heading above.
+ update_record($record, $marc) if $changed;
+
+ # We don't control fast headings, so there's nothing
+ # left to do. Move on to the next field...
+ $KU->announce('DEBUG',
+ "No linking performed on FAST heading ".
+ "field on rec=$rec_id and tag=$c_tag");
+ next;
+ }
+
+ my %match_subfields;
+ my $match_tag;
+ my @searches;
+ foreach my $c_subfield (@c_subfields) {
+ my @sf_values = $bib_field->subfield($c_subfield);
+ if (@sf_values) {
+ # Give me the first element of the list of authority controlling tags for this subfield
+ # XXX Will we need to support more than one controlling tag per subfield? Probably. That
+ # will suck. Oh well, leave that up to Ole to implement.
+ $match_subfields{$c_subfield} = (keys %{$controllees{$c_tag}{$c_subfield}})[0];
+ $match_tag = $match_subfields{$c_subfield};
+ push @searches, map {{term => $_, subfield => $c_subfield}} @sf_values;
}
+ }
+ next if !$match_tag;
- $KU->announce('INFO', "Match query returned @$validates");
+ $KU->announce('INFO',
+ "Searching for matches on controlled field $c_tag ".
+ "(auth tag=$match_tag): \n - ".Dumper(\@searches));
+
+ my @tags = ($match_tag);
+
+ # Now we've built up a complete set of matching controlled
+ # subfields for this particular field; let's check to see if
+ # we have a matching authority record
+ my $session = OpenSRF::AppSession->create("open-ils.search");
+ my $validates = $session->request(
+ "open-ils.search.authority.validate.tag.id_list",
+ "tags", \@tags, "searches", \@searches
+ )->gather();
+
+ # Protect against failed (error condition) search request
+ if (!$validates) {
+ $KU->announce('WARNING',
+ "Search for matching authority failed; record $rec_id");
+ next;
+ }
- # No matches found. Nothing left to do for this field.
- next if scalar(@$validates) == 0;
+ $KU->announce('INFO', "Match query returned @$validates");
- # Iterate through the returned authority record IDs to delete any
- # matching $0 subfields already in the bib record
- foreach my $auth_zero (@$validates) {
- $bib_field->delete_subfield(code => '0', match => qr/\)$auth_zero$/);
- }
+ # No matches found. Nothing left to do for this field.
+ next if scalar(@$validates) == 0;
- # Find the best authority record to use for linking.
+ # Iterate through the returned authority record IDs to delete any
+ # matching $0 subfields already in the bib record
+ foreach my $auth_zero (@$validates) {
+ $bib_field->delete_subfield(code => '0', match => qr/\)$auth_zero$/);
+ }
- my $auth_leaders;
- if ($bib_field->tag =~ /^[167]/) {
- # For 1XX, 6XX, and 7XX bib fields, only link to
- # authority records whose leader/008 positions
- # 14 and 15 are coded to allow use as a name/author
- # or subject record, depending.
+ # Find the best authority record to use for linking.
- $auth_leaders = authority_leaders_008_14_15(
- $e, $bib_field->tag, $validates);
+ my $auth_leaders;
+ if ($bib_field->tag =~ /^[167]/) {
+ # For 1XX, 6XX, and 7XX bib fields, only link to
+ # authority records whose leader/008 positions
+ # 14 and 15 are coded to allow use as a name/author
+ # or subject record, depending.
- $validates = [map {$_->{record}} @$auth_leaders];
- }
+ $auth_leaders = authority_leaders_008_14_15(
+ $e, $bib_field->tag, $validates);
- my $auth_id;
+ $validates = [map {$_->{record}} @$auth_leaders];
+ }
- if ($bib_field->tag() =~ /^65[015]/) {
- # Using the indicator-2 value from the controlled bib
- # field, find the first authority in the list of matches
- # that uses the same thesaurus. If no such authority
- # is found, no matching occurs.
- # TODO: perhaps this step should be part of the
- # validation API search call above.
-
- $auth_id = find_matching_auth_for_thesaurus(
- $e, $bib_field, $auth_leaders) || '';
+ my $auth_id;
- } else {
+ if ($bib_field->tag() =~ /^65[015]/) {
+ # Using the indicator-2 value from the controlled bib
+ # field, find the first authority in the list of matches
+ # that uses the same thesaurus. If no such authority
+ # is found, no matching occurs.
+ # TODO: perhaps this step should be part of the
+ # validation API search call above.
+
+ $auth_id = find_matching_auth_for_thesaurus(
+ $e, $bib_field, $auth_leaders) || '';
- # For all other controlled fields, use the first
- # authority record in the result set.
- $auth_id = $validates->[0];
- }
+ } else {
- # Don't exit here just because we have no $auth_id. The
- # bib field could have been changed above in the cleanup /
- # delete phase.
-
- if ($auth_id) {
- # Add the auth ID and control number agency info from the
- # matching authority record to the controlled bib field.
- $changed = 1;
- $bib_field->add_subfields('0' => "($CNI)$auth_id");
- $KU->announce('INFO',
- "auth=$auth_id cni=$CNI. It's a match!");
- }
+ # For all other controlled fields, use the first
+ # authority record in the result set.
+ $auth_id = $validates->[0];
}
- }
- update_record($record, $marc) if $changed;
- };
+ # Don't exit here just because we have no $auth_id. The
+ # bib field could have been changed above in the cleanup /
+ # delete phase.
+
+ if ($auth_id) {
+ # Add the auth ID and control number agency info from the
+ # matching authority record to the controlled bib field.
+ $changed = 1;
+ $bib_field->add_subfields('0' => "($CNI)$auth_id");
+ $KU->announce('INFO',
+ "auth=$auth_id cni=$CNI. It's a match!");
+ }
- if ($@) {
- $KU->announce('WARNING', "Error linking record $rec_id : $@");
- # Reset SAX parser so that one bad record doesn't kill the entire process
- import MARC::File::XML;
- }
+ } # for each bib field with selected tag
+
+ } # for each controlled bib tag
+
+ update_record($record, $marc) if $changed;
}
my $end_time = time();