From: Bill Erickson Date: Wed, 18 Nov 2015 17:21:49 +0000 (-0500) Subject: JBAS-980 readability; remove stale links X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=68597667269ae95951bc279c418cd852107442ba;p=working%2FEvergreen.git JBAS-980 readability; remove stale links If auth record A links to record B, but they use a different thesaurus, unlink them. When 2 records use a different thesaurus, exit early to avoid unnecessary processing. Various readability and style modifications. Signed-off-by: Bill Erickson --- diff --git a/KCLS/linking/authority_authority_linker.pl b/KCLS/linking/authority_authority_linker.pl index e190016211..912e2b7d47 100755 --- a/KCLS/linking/authority_authority_linker.pl +++ b/KCLS/linking/authority_authority_linker.pl @@ -17,6 +17,7 @@ use OpenILS::Application::AppUtils; use Data::Dumper; use Pod::Usage qw/ pod2usage /; +$Data::Dumper::Indent = 0; MARC::Charset->assume_unicode(1); my $acsaf_cache = {}; @@ -82,27 +83,32 @@ sub matchable_string { my ($start_id, $end_id); my $bootstrap = '/openils/conf/opensrf_core.xml'; my @records; +my $verbose; my %options; my $result = GetOptions( \%options, 'configuration=s' => \$bootstrap, 'record=i' => \@records, - 'all', 'help', 'debug', + 'all', 'help', 'start_id=i' => \$start_id, - 'end_id=i' => \$end_id + 'end_id=i' => \$end_id, + 'verbose' => \$verbose ); +sub announce { + my $msg = shift; + return unless $verbose; + print DateTime->now->strftime('%F %T') . " $msg\n"; +} + pod2usage(0) if not $result or $options{help}; -print "OpenSRF bootstrap and fieldmapper import...\n" if $options{debug}; OpenSRF::System->bootstrap_client(config_file => $bootstrap); Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); # must be loaded and initialized after the IDL is parsed -print "Loading CStoreEditor ...\n" if $options{debug}; - use OpenILS::Utils::CStoreEditor; OpenILS::Utils::CStoreEditor::init(); @@ -118,8 +124,11 @@ my $query = q{ sh2.atag AS field FROM authority.simple_heading sh1 JOIN authority.simple_heading sh2 USING (sort_value) - JOIN authority.control_set_authority_field af1 ON (sh1.atag = af1.id AND af1.main_entry IS NULL) - JOIN authority.control_set_authority_field af2 ON (sh2.atag = af2.id AND af2.main_entry IS NOT NULL AND af2.linking_subfield IS NOT NULL) + JOIN authority.control_set_authority_field af1 ON + (sh1.atag = af1.id AND af1.main_entry IS NULL) + JOIN authority.control_set_authority_field af2 ON + (sh2.atag = af2.id AND af2.main_entry IS NOT NULL + AND af2.linking_subfield IS NOT NULL) %s -- where clause here EXCEPT SELECT target, source, field FROM authority.authority_linking ) x GROUP BY 1 @@ -138,68 +147,119 @@ if (@records) { pod2usage(0); } -print "SQL, params: ", Dumper($query, \@bind_params), "\n" if $options{debug}; +announce("SQL, params: ", Dumper($query, \@bind_params)); my $dbh = connect_to_db; # dies if any problem $dbh->do('SET statement_timeout = 0'); my $sth = $dbh->prepare($query); -print "Executing query ...\n" if $options{debug}; +announce("Executing query ..."); $sth->execute(@bind_params); my $problems = 0; -my $start_time = localtime(); -print "Start " . $start_time . " for records " . $start_id . " to " . $end_id . "\n"; + +if (@records) { + announce("Processing ".scalar(@records)." records"); +} elsif ($start_id) { + announce("Processing records $start_id => $end_id"); +} else { + announce("Processing all records"); +} + +my $total_records = 0; while (my ($src, $links) = $sth->fetchrow_array) { - print "src: $src\n" if $options{debug}; + announce("--------------------"); + announce("Processing authority source record $src"); + $total_records++; try { - my $src_rec = $e->retrieve_authority_record_entry($src) or - die $e->die_event; + + my $src_rec = $e->retrieve_authority_record_entry($src); + if (!$src_rec) { + warn "Unable to load source authority record $src\n"; + next; + } + my $src_marc = MARC::Record->new_from_xml($src_rec->marc); - my $Auth_Source_Indic = substr($src_marc->field('008')->data(), 11, 1); + my $auth_src_thesaurus = substr($src_marc->field('008')->data(), 11, 1); + announce("Source record thesaurus value=$auth_src_thesaurus"); + my $changed = 0; for my $link (split ';', $links) { my ($target, $field_id) = split ',', $link; - print "target: $target, field_id: $field_id\n" if $options{debug}; + announce("Target: $target, field_id: $field_id"); + + my $target_rec = $e->retrieve_authority_record_entry($target); + if (!$target_rec) { + warn "Unable to load authority record $target. Skipping\n"; + next; + } - my $target_rec = $e->retrieve_authority_record_entry($target) or - die $e->die_event; my $target_marc = MARC::Record->new_from_xml($target_rec->marc); + + my $auth_target_thesaurus = + substr($target_marc->field('008')->data(), 11, 1); + announce("Target record thesaurus value=$auth_target_thesaurus"); + + # warn here, cleanup invalid links below + announce("Thesauri for source/target records do not match") + if $auth_src_thesaurus ne $auth_target_thesaurus; + my $cni = $target_marc->field('003')->data; - my $Auth_Target_Indic = substr($target_marc->field('008')->data(), 11, 1); - my $acsaf = get_acsaf($e, $field_id) or die $e->die_event; + my $acsaf = get_acsaf($e, $field_id); + if (!$acsaf) { + warn "No authority control set field found for $field_id. Skipping\n"; + next; + } for my $field ($src_marc->field($acsaf->tag)) { + + if ($auth_src_thesaurus ne $auth_target_thesaurus) { + my @zeros = $field->subfield('0'); + announce("Existing links: @zeros"); + if (grep { $_ =~ qr/\)$target$/ } @zeros) { + announce("Removing link(s) on ".$field->tag. + " for src/target thesaurus mismatch"); + $field->delete_subfield(code => '0', match => qr/\)$target$/); + $changed = 1; + } + next; + } + my $src_string = matchable_string( - $field, $acsaf->main_entry->display_sf_list, $acsaf->main_entry->joiner + $field, $acsaf->main_entry->display_sf_list, + $acsaf->main_entry->joiner ); - print("at field ", $acsaf->id, " (", $acsaf->tag, - "), trying to match '$src_string'...\n") if $options{debug}; + announce(sprintf( + "At field id=%s (tag=%s) / trying to match '%s'", + $acsaf->id, $acsaf->tag, $src_string)); my ($tfield) = $target_marc->field($acsaf->main_entry->tag); + if(defined $tfield) { my $target_string = matchable_string( - $tfield, $acsaf->main_entry->display_sf_list, $acsaf->main_entry->joiner + $tfield, $acsaf->main_entry->display_sf_list, + $acsaf->main_entry->joiner ); - if ($target_string eq $src_string and $Auth_Source_Indic eq $Auth_Target_Indic) { - print "got a match ...\n" if $options{debug}; + if ($target_string eq $src_string) { + announce("Got a match"); $field->update('0' => "($cni)$target"); + $changed = 1; } } - else { - } } } - $src_rec->marc(marcxml_eg($src_marc->as_xml_record)); - $e->xact_begin; - $e->update_authority_record_entry($src_rec) or - die $e->die_event; - $e->xact_commit; + if ($changed) { + announce("Updating authority record ".$src_rec->id); + $src_rec->marc(marcxml_eg($src_marc->as_xml_record)); + $e->xact_begin; + $e->update_authority_record_entry($src_rec) or die $e->die_event; + $e->xact_commit; + } } otherwise { my $err = shift; @@ -213,8 +273,16 @@ while (my ($src, $links) = $sth->fetchrow_array) { $problems++; } } -my $end_time = localtime(); -print "----- Stop " . $end_time . " for records " . $start_id . " to " . $end_id . "\n"; + +if (@records) { + announce("Processed records ". scalar(@records). + " records; processed=$total_records problems=$problems"); +} elsif ($start_id) { + announce("Processed records $start_id => $end_id; ". + "processed=$total_records; problems=$problems"); +} else { + announce("Processed all records; processed=$total_records; problems=$problems"); +} exit ($problems > 0);