use OpenSRF::System;
use OpenILS::Utils::Fieldmapper;
use OpenSRF::Utils::SettingsClient;
+use OpenILS::Utils::Normalize qw(naco_normalize);
use Data::Dumper;
use OpenILS::Application::AppUtils;
use OpenILS::Utils::KCLSScriptUtil;
+my $U = 'OpenILS::Application::AppUtils';
my $KU = 'OpenILS::Utils::KCLSScriptUtil';
$ENV{OSRF_LOG_CLIENT} = 1;
where => {'+afr' => {tag => '008', record => $auth_ids}}
});
+ # Sort the auth_leaders list to match the order of the origin
+ # auth_ids, since they are prioritized by heading matchy-ness.
+ my @tmp_leaders = @$auth_leaders;
+ $auth_leaders = [];
+ for my $auth_id (@$auth_ids) {
+ my ($leader) = grep {$_->{record} eq $auth_id} @tmp_leaders;
+ push(@$auth_leaders, $leader) if $leader;
+ }
+
my $index;
$index = 14 if $bib_tag =~ /^[17]/; # author/name record
$index = 15 if $bib_tag =~ /^6/; # subject record
}
}
+sub find_potential_auth_matches {
+ my ($bib_field) = @_;
+
+ my $bib_tag = $bib_field->tag;
+ my @c_subfields = keys %{$controllees{$bib_tag}};
+
+ # Iterate over the subfields within the record, instead
+ # of the known controlled subfields, to retain field order.
+ my @searches;
+ my $match_auth_tag;
+ for my $r_subfields ($bib_field->subfields) {
+ my $c_subfield = $r_subfields->[0];
+
+ # skip uncontrolled subfields.
+ next unless grep {$_ eq $c_subfield} @c_subfields;
+
+ # Assume each bib field is controlled by one authority field.
+ $match_auth_tag = (keys %{$controllees{$bib_tag}{$c_subfield}})[0];
+
+ for my $sf_val ($bib_field->subfield($c_subfield)) {
+ push (@searches, {term => $sf_val, subfield => $c_subfield});
+ }
+ }
+
+ return [] unless $match_auth_tag;
+
+ # KCLS JBAS-1470
+ # Find all authority records whose simple_heading is (essentially)
+ # a left-anchored substring match of the normalized bib heading.
+ # Sort by longest to shortest match. Include the shorter matches
+ # because a longer match may later be discarded, e.g. because it
+ # uses a different thesaurus.
+ #
+ # We don't exactly want a substring match, more like a sub-tag
+ # match. A straight substring match on the heading is both slow
+ # (at the DB level) and could result in partial value matches, like
+ # 'smith' vs. 'smithsonian', which we don't want.
+
+ if (1) {
+
+ my @auth_ids;
+ while (@searches) {
+ my $heading = $match_auth_tag;
+
+ $heading .= " " . $_->{subfield} . " " .
+ naco_normalize($_->{term}) for @searches;
+
+ $KU->announce('DEBUG',
+ "Authority sub-heading search for: $heading");
+
+ my $ids = $e->search_authority_record_entry(
+ {simple_heading => $heading, deleted => 'f'},
+ {idlist => 1}
+ );
+
+ # Don't let a single cstore query failure kill the whole process
+ return [] unless $ids;
+
+ $KU->announce('DEBUG',
+ "Authority heading search returned @$ids") if @$ids;
+
+ push(@auth_ids, @$ids);
+ pop(@searches);
+ }
+
+ return \@auth_ids;
+ }
+
+ # Legacy bib-to-auth lookup routine
+ # SHOULD NOT GET HERE. KEEPING FOR TESTS.
+
+ $KU->announce('INFO',
+ "Searching for matches on controlled field $bib_tag ".
+ "(auth tag=$match_auth_tag): \n - ".Dumper(\@searches));
+
+ my $auth_ids = $U->simplereq(
+ 'open-ils.search',
+ "open-ils.search.authority.validate.tag.id_list",
+ "tags", [$match_auth_tag], "searches", \@searches
+ );
+
+ return $auth_ids ? $auth_ids : [];
+}
+
my $count = 0;
my $total = scalar(@records);
-$KU->announce('INFO', "processing $total bib records");
# for logging
if ($slot_count && defined $slot) {
my @c_fields = keys %controllees;
foreach my $c_tag (@c_fields) {
- my @c_subfields = keys %{$controllees{"$c_tag"}};
- # Get the MARCXML from the record and check for controlled fields/subfields
my @bib_fields = ($marc->field($c_tag));
foreach my $bib_field (@bib_fields) {
next;
}
- my %match_subfields;
- my $match_tag;
- my @searches;
- foreach my $c_subfield (@c_subfields) {
- my @sf_values = $bib_field->subfield($c_subfield);
- if (@sf_values) {
- # Give me the first element of the list of authority controlling tags for this subfield
- # XXX Will we need to support more than one controlling tag per subfield? Probably. That
- # will suck. Oh well, leave that up to Ole to implement.
- $match_subfields{$c_subfield} = (keys %{$controllees{$c_tag}{$c_subfield}})[0];
- $match_tag = $match_subfields{$c_subfield};
- push @searches, map {{term => $_, subfield => $c_subfield}} @sf_values;
- }
- }
- next if !$match_tag;
-
- $KU->announce('INFO',
- "Searching for matches on controlled field $c_tag ".
- "(auth tag=$match_tag): \n - ".Dumper(\@searches));
-
- my @tags = ($match_tag);
-
- # Now we've built up a complete set of matching controlled
- # subfields for this particular field; let's check to see if
- # we have a matching authority record
- my $session = OpenSRF::AppSession->create("open-ils.search");
- my $validates = $session->request(
- "open-ils.search.authority.validate.tag.id_list",
- "tags", \@tags, "searches", \@searches
- )->gather();
-
- # Protect against failed (error condition) search request
- if (!$validates) {
- $KU->announce('WARNING',
- "Search for matching authority failed; record $rec_id");
- next;
- }
+ my $validates = find_potential_auth_matches($bib_field);
$KU->announce('INFO', "Match query returned auth rec(s): @$validates");