From 107d8a6d7d2190ff9ead168f518607ecb828fb3c Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Fri, 18 Dec 2015 12:46:57 -0800 Subject: [PATCH] JBAS-980 Make auth-2-auth more aggressive Remove all existing links for each processed auth-2-auth field before rebuilding the links. This lets us remove bogus links. Signed-off-by: Bill Erickson --- KCLS/linking/authority_authority_linker.pl | 43 +++++++++++++++++++----------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/KCLS/linking/authority_authority_linker.pl b/KCLS/linking/authority_authority_linker.pl index 97e6f010c8..0aecb2f791 100755 --- a/KCLS/linking/authority_authority_linker.pl +++ b/KCLS/linking/authority_authority_linker.pl @@ -51,6 +51,8 @@ my $db_host = $ENV{PGHOST} || 'localhost'; my $db_port = $ENV{PGPORT} || '5432'; my $db_user = $ENV{PGDATABASE} || 'evergreen'; my $db_pass = $ENV{PGPASSWORD}; +my $links_removed = 0; +my $links_added = 0; my %options; my $result = GetOptions( @@ -101,7 +103,9 @@ my $query = q{ (sh2.atag = af2.id AND af2.main_entry IS NOT NULL AND af2.linking_subfield IS NOT NULL) %s -- where clause here - EXCEPT SELECT target, source, field FROM authority.authority_linking + -- Ignore authority.authority_linking rows since we want to + -- rebuild all links, which may mean deleting bogus links. + -- EXCEPT SELECT target, source, field FROM authority.authority_linking ) x GROUP BY 1 }; @@ -161,6 +165,8 @@ while (my ($src, $links) = $sth->fetchrow_array) { for my $link (split ';', $links) { my ($target, $field_id) = split ',', $link; + next if $target eq $src_rec->id; + announce("Target: $target, field_id: $field_id"); my $target_rec = $e->retrieve_authority_record_entry($target); @@ -173,11 +179,13 @@ while (my ($src, $links) = $sth->fetchrow_array) { my $auth_target_thesaurus = substr($target_marc->field('008')->data(), 11, 1); + announce("Target record thesaurus value=$auth_target_thesaurus"); - # warn here, cleanup invalid links below - announce("Thesauri for source/target records do not match") - if $auth_src_thesaurus ne $auth_target_thesaurus; + if ($auth_src_thesaurus ne $auth_target_thesaurus) { + announce("Thesauri for source/target records do not match. Skipping.."); + next; + } my $cni = $target_marc->field('003')->data; my $acsaf = get_acsaf($e, $field_id); @@ -186,19 +194,18 @@ while (my ($src, $links) = $sth->fetchrow_array) { next; } + # start by removing all existing links for the current tag for my $field ($src_marc->field($acsaf->tag)) { - - if ($auth_src_thesaurus ne $auth_target_thesaurus) { - my @zeros = $field->subfield('0'); - announce("Existing links: @zeros"); - if (grep { $_ =~ qr/\)$target$/ } @zeros) { - announce("Removing link(s) on ".$field->tag. - " for src/target thesaurus mismatch"); - $field->delete_subfield(code => '0', match => qr/\)$target$/); - $changed = 1; - } - next; + if (my $val = $field->subfield('0')) { + announce("Removing existing subfield 0 : $val"); + $field->delete_subfield(code => '0'); + $changed = 1; + $links_removed++; } + } + + # rebuild the links for the current tag + for my $field ($src_marc->field($acsaf->tag)) { my $src_string = matchable_string( $field, $acsaf->main_entry->display_sf_list, @@ -212,6 +219,7 @@ while (my ($src, $links) = $sth->fetchrow_array) { my ($tfield) = $target_marc->field($acsaf->main_entry->tag); if(defined $tfield) { + my $target_string = matchable_string( $tfield, $acsaf->main_entry->display_sf_list, $acsaf->main_entry->joiner @@ -221,6 +229,7 @@ while (my ($src, $links) = $sth->fetchrow_array) { announce("Got a match"); $field->update('0' => "($cni)$target"); $changed = 1; + $links_added++; } } } @@ -257,6 +266,10 @@ if (@records) { announce("Processed all records; processed=$total_records; problems=$problems"); } +announce("links removed: $links_removed"); +announce("links added: $links_added"); +announce("delta added: ".($links_added - $links_removed)); + exit ($problems > 0); __END__ -- 2.11.0