--- /dev/null
+#!/usr/bin/perl
+
+# ---------------------------------------------------------------
+# Copyright © 2019 MOBIUS
+# Blake Graham-Henderson <blake@mobiusconsortium.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+# ---------------------------------------------------------------
+
+# These Perl modules are required in addition to standard Evergreen installations:
+# install Email::MIME
+# install Email::Sender::Simple
+# install Digest::SHA1
+# install REST::Client
+# install pQuery
+
+use MARC::Record;
+use MARC::File;
+use MARC::File::XML (BinaryEncoding => 'utf8');
+use MARC::File::USMARC;
+use File::Path qw(make_path remove_tree);
+use strict;
+use Data::Dumper;
+use DateTime;
+use utf8;
+use Encode;
+use LWP::Simple;
+use OpenILS::Application::AppUtils;
+use DateTime::Format::Duration;
+use Digest::SHA1;
+use File::stat;
+use File::Copy;
+use Getopt::Long;
+use REST::Client;
+use LWP::UserAgent;
+use Digest::SHA qw(hmac_sha256_base64);
+use HTML::Entities;
+use POSIX;
+use DBD::Pg;
+use Email::MIME;
+use Email::Sender::Simple;
+
+our $configFile;
+our $debug = 0;
+our $syncnines = 0;
+our $reprocess = -1;
+our $searchDeepMatch = 0;
+our $match901c = 0;
+our $reportOnly = -1;
+our $continueJob = 0;
+our $resha = 0;
+
+
+GetOptions (
+"config=s" => \$configFile,
+"reprocess=s" => \$reprocess,
+"search_deep" => \$searchDeepMatch,
+"report_only=s" => \$reportOnly,
+"continue=s" => \$continueJob,
+"match_901c" => \$match901c,
+"resha" => \$resha,
+"debug" => \$debug,
+"syncnines" => \$syncnines,
+)
+or die("Error in command line arguments\nYou can specify
+--config configfilename [Path to the config file - required]
+--reprocess jobID [Optional: Skip the import process and re-process provided job ID]
+--search_deep [Optional: Cause the software to spend more time searching for BIB matches]
+--match_901c [Optional: Cause the software to match existing BIBS using the incoming MARC 901c]
+--report_only jobID [Optional: Only email the report for a previous job. Provide the job ID]
+--continue jobID [Optional: Cause the software to finish an old job that was not finsihed]
+--resha flag [Optional: Cause the software to loop through all of the previously imported bibs and recalculate the matching sha]
+--debug flag [Cause more output and logging]
+--syncnines flag [Instead of importing records, this flag causes the software to syncronize the \$9's on already imported bibs]
+\n");
+
+ our $conf = mobutil_readConfFile($configFile);
+ our %conf;
+ our $jobid = -1;
+ our $log;
+ our $archivefolder;
+ our $importSourceName;
+ our $importBIBTagName;
+ our $importBIBTagNameDB;
+ our $remotefolder;
+ our $dbHandler;
+ our $domainname = '';
+ our $bibsourceid = -1;
+ our $recurseFTP = 1;
+ our $lastDateRunFilePath;
+ our $cert;
+ our $certKey;
+ our @shortnames;
+ our %marcEdits = ();
+
+
+ if(!$configFile)
+ {
+ print "Please specify a config file\n";
+ exit;
+ }
+
+ if($conf)
+ {
+ %conf = %{$conf};
+ if ($conf{"logfile"})
+ {
+ my $dt = DateTime->now(time_zone => "local");
+ my $fdate = $dt->ymd;
+ my $ftime = $dt->hms;
+ my $dateString = "$fdate $ftime";
+ $log = $conf->{"logfile"};
+ # logfile_truncFile($log, "");
+ logfile_addLogLine($log," ---------------- Script Starting ---------------- ");
+ my @reqs = ("server","login","password","remotefolder","sourcename","tempspace","archivefolder","dbhost","db","dbuser","dbpass","port","participants","logfile","incomingmarcfolder","recordsource","ignorefiles","removalfiles","bibtag");
+
+ # There are some special directives required when cloudlibrary is selected
+ push(@reqs, ("lastdatefile","certpath","certkeypath")) if( lc ($conf{"recordsource"}) eq 'cloudlibrary');
+
+ my $valid = 1;
+ my $errorMessage="";
+ for my $i (0..$#reqs)
+ {
+ if(!$conf{@reqs[$i]})
+ {
+ logfile_addLogLine($log,"Required configuration missing from conf file");
+ logfile_addLogLine($log,@reqs[$i]." required");
+ $valid = 0;
+ }
+ }
+
+ $lastDateRunFilePath = $conf{"lastdatefile"} if( lc ($conf{"recordsource"}) eq 'cloudlibrary');
+ $cert = $conf{"certpath"} if( lc ($conf{"recordsource"}) eq 'cloudlibrary');
+ $certKey = $conf{"certkeypath"} if( lc ($conf{"recordsource"}) eq 'cloudlibrary');
+
+ $archivefolder = $conf{"archivefolder"};
+ $importSourceName = $conf{"sourcename"};
+ $remotefolder = $conf{"remotefolder"};
+ $importBIBTagName = $conf{"bibtag"};
+ $importBIBTagNameDB = $conf{"bibtag"};
+ $importBIBTagNameDB =~ s/\s/\-/g;
+ $domainname = $conf{"domainname"} || '';
+ $recurseFTP = $conf{"recurse"} || 1;
+ $recurseFTP = lc $recurseFTP;
+ $recurseFTP = ($recurseFTP eq 'n' ? 0 : 1);
+
+ if(!(-d $archivefolder))
+ {
+ $valid = 0;
+ print "Sorry, the archive folder does not exist: $archivefolder\n";
+ $errorMessage = "Sorry, the archive folder does not exist: $archivefolder";
+ }
+ #remove trailing slash
+ $archivefolder =~ s/\/$//;
+
+ my @editArray = ('add','replace','remove','removesubfield');
+ foreach(@editArray)
+ {
+ my @a = ();
+ $marcEdits{$_} = \@a;
+ undef @a;
+ }
+
+ runSyncNines() if($syncnines); # this routine will exit, this is the end of the main body of execution
+
+ parseMARCEdits();
+
+ my @files;
+
+ if($valid)
+ {
+ my @marcOutputRecords;
+ my @marcOutputRecordsRemove;
+ my $removalsViaMARC = 1;
+ @shortnames = split(/,/,$conf{"participants"});
+ for my $y(0.. $#shortnames)
+ {
+ @shortnames[$y]=mobutil_trim(@shortnames[$y]);
+ }
+ eval{dbhandler_setupConnection($conf{"db"},$conf{"dbhost"},$conf{"dbuser"},$conf{"dbpass"},$conf{"port"});};
+ if ($@)
+ {
+ print "Could not establish a connection to the database\n";
+ alertErrorEmail("Could not establish a connection to the database");
+ exit 1;
+ }
+
+ setupSchema();
+
+ reCalcSha() if $resha;
+
+ my $doSomething = 0;
+
+ if($reprocess != -1)
+ {
+ $bibsourceid = getbibsource();
+ $jobid = $reprocess;
+ $doSomething = resetJob($reprocess);
+ }
+ elsif($continueJob)
+ {
+ $bibsourceid = getbibsource();
+ # Make sure the provided job exists
+ my $query = "select id from bib_magic.import_status where job = $continueJob and status=\$\$new\$\$";
+ updateJob("Processing",$query);
+ my @results = @{dbhandler_query($query)};
+ $jobid = $continueJob if $#results > -1;
+ $doSomething = 1 if $#results > -1;
+ my $t = $#results;
+ $t++; # 0 based to 1 based
+ print "Nothing unfinished for job $continueJob. Nothing to do.\n" if $#results < 0;
+ print "Continuing job $continueJob with $t thing(s) to process\n" if $#results > -1;
+ undef @results;
+ }
+ elsif($reportOnly == -1) ## Make sure we are not just running reports
+ {
+ @files = @{getmarc()};
+
+ if($#files!=-1)
+ {
+ $bibsourceid = getbibsource();
+ $jobid = createNewJob('processing');
+ if($jobid==-1)
+ {
+ $errorMessage = "Could not create a new job number in the schema - ";
+ logfile_addLogLine($log,$errorMessage);
+ deleteFiles(\@files);
+ $errorMessage."\n$_" foreach(@files);
+ alertErrorEmail($errorMessage);
+ exit;
+ }
+ $doSomething = prepFiles(\@files);
+ }
+ }
+ $doSomething = 1 if $reportOnly != -1;
+ $jobid = $reportOnly if $reportOnly != -1;
+
+ if($doSomething) # File prep resulted in stuff we need to do or it's a re-process
+ {
+ if ($reportOnly == -1)
+ {
+ # Send a comfort message explaining that we have received the files and it might take some time before
+ # they receive the finished message. Only when it's type folder and deep match searching is configured.
+ sendWelcomeMessage(\@files);
+
+ my $startTime = DateTime->now(time_zone => "local");
+ my $displayInterval = 100;
+ my $recalcTimeInterval = 500;
+ my $bibsImported = 0;
+ my $authsImported = 0;
+ ## Bib Imports
+ my $query = "SELECT id,title,z01,sha1,marc_xml,filename from bib_magic.import_status where type=\$\$importbib\$\$ and job=$jobid and status=\$\$new\$\$ order by id";
+ updateJob("Processing",$query);
+ my @results = @{dbhandler_query($query)};
+ my $count = 0;
+ my $totalCount = 0;
+ $bibsImported = 1 if ($#results > -1);
+ foreach(@results)
+ {
+ my @row = @{$_};
+ importMARCintoEvergreen(@row[0],@row[1],@row[2],@row[3],@row[4]);
+ $count++;
+ $totalCount++;
+ ($count, $startTime) = displayRecPerSec("Import Bibs", $count, $startTime, $displayInterval, $recalcTimeInterval, $totalCount, $#results);
+ }
+ undef @results;
+
+ ## Authority Imports
+ my $query = "SELECT filename from bib_magic.import_status where type=\$\$importauth\$\$ and job=$jobid and status=\$\$new\$\$ group by 1";
+ updateJob("Processing",$query);
+ my @results = @{dbhandler_query($query)};
+ my $count = 0;
+ $startTime = DateTime->now(time_zone => "local");
+ foreach(@results)
+ {
+ my @row = @{$_};
+ importAuthority(@row[0]);
+ $count++;
+ $totalCount++;
+ ($count, $startTime) = displayRecPerSec("Import Authority", $count, $startTime, $displayInterval, $recalcTimeInterval, $totalCount, $#results);
+ $authsImported = 1 if !$authsImported;
+ }
+ undef @results;
+
+ ## Removals
+ my $query = "SELECT id,title,z01,sha1,marc_xml,filename,type from bib_magic.import_status where type~\$\$remov\$\$ and job=$jobid and status=\$\$new\$\$ order by type,id";
+ updateJob("Processing",$query);
+ my @results = @{dbhandler_query($query)};
+ my $count = 0;
+ $startTime = DateTime->now(time_zone => "local");
+ foreach(@results)
+ {
+ my @row = @{$_};
+ my $removalViaMARAC = 1;
+ $removalViaMARAC = 0 if @row[6] eq 'isbn_remove';
+ print "Removal Type: @row[6] isbnRemoval = $removalViaMARAC\n" if $debug;
+ removeBibsEvergreen(@row[0],@row[1],@row[2],@row[3],@row[4],$removalViaMARAC);
+ $count++;
+ $totalCount++;
+ ($count, $startTime) = displayRecPerSec("Bib Removal", $count, $startTime, $displayInterval, $recalcTimeInterval, $totalCount, $#results);
+ }
+ undef @results;
+
+ ## Authority linker when there were bibs imported AND Auth imports
+ if($bibsImported && $authsImported && $conf{'authority_link_script_cmd'})
+ {
+ my $query = "SELECT bib from bib_magic.import_status where type=\$\$importbib\$\$ and job=$jobid and bib is not null order by bib";
+ updateJob("Processing",$query);
+ my @results = @{dbhandler_query($query)};
+ my $count = 0;
+ $startTime = DateTime->now(time_zone => "local");
+ foreach(@results)
+ {
+ my @row = @{$_};
+ my $cmd = $conf{'authority_link_script_cmd'} . ' ' . @row[0];
+ logfile_addLogLine($log,$cmd);
+ system($cmd);
+ $count++;
+ $totalCount++;
+ ($count, $startTime) = displayRecPerSec("Authority Linker", $count, $startTime, $displayInterval, $recalcTimeInterval, $totalCount, $#results);
+ }
+ undef @results;
+ }
+
+ }
+
+ my $report = runReports();
+ my $duration = calculateTimeDifference($dt);
+
+ my $body = "Hi Team,\r\n\r\n";
+ $body .= "Thanks for your file(s). It took me some time to work on it:\r\n$duration\r\n\r\n";
+ $body .= "I've digested the file(s):\r\n\r\n";
+ $body .= $report;
+ $body .= "\r\n\r\nImport Type: ".$conf{"recordsource"};
+ $body .= "\r\nConnected to: ".$conf{"server"} if($conf{"recordsource"} ne 'folder');
+ $body .= "\r\nYours Truly,\r\nThe friendly server";
+
+ updateJob("Processing","Email sending:\n$body");
+
+ my @tolist = ($conf{"alwaysemail"});
+ my $email = email_setup($conf{"fromemail"},\@tolist,$valid,1,\%conf);
+ email_send($email,"Evergreen Electronic Import Summary - $importBIBTagName Job # $jobid",$body);
+
+ updateJob("Completed","");
+ }
+
+ logfile_addLogLine($log," ---------------- Script Ending ---------------- ");
+ }
+ else
+ {
+ print "Config file does not define some of the required directives. See Log for details\n";
+ }
+ }
+ else
+ {
+ print "Config file: 'logfile' directive is required\n";
+ }
+ }
+
+sub runSyncNines
+{
+ eval{dbhandler_setupConnection($conf{"db"},$conf{"dbhost"},$conf{"dbuser"},$conf{"dbpass"},$conf{"port"});};
+ if ($@)
+ {
+ print "Could not establish a connection to the database\n";
+ alertErrorEmail("Could not establish a connection to the database");
+ exit 1;
+ }
+ @shortnames = split(/,/,$conf{"participants"});
+ for my $y(0.. $#shortnames)
+ {
+ @shortnames[$y]=mobutil_trim(@shortnames[$y]);
+ }
+ my @relatedRecords = @{getRelatedBibList($dbHandler)};
+ # print "done gathering\n";
+ my @updatethese;
+ foreach(@relatedRecords)
+ {
+ my $marc = @{$_}[1];
+ my $id = @{$_}[0];
+ $marc =~ s/(<leader>.........)./${1}a/;
+ my $marcobject = MARC::Record->new_from_xml($marc);
+ # print "adding 9s $id\n";
+ $marcobject = sync9s($marcobject);
+ my $thisXML = convertMARCtoXML($marcobject);
+ my $before = substr($marc,index($marc, '<leader>'));
+ my $after = substr($thisXML,index($thisXML, '<leader>'));
+ if($before ne $after)
+ {
+ my @temp = ( $id, $thisXML );
+ push @updatethese, [@temp];
+ # print "adding to update list\n";
+ # logfile_addLine($log,"These are different now $id");
+ # logfile_addLine($log,"$marc\r\nbecame\r\n$thisXML");
+ }
+ undef $marc;
+ }
+ foreach(@updatethese)
+ {
+ my @both = @{$_};
+ my $bibid = @both[0];
+ my $marc = @both[1];
+ my @urls = @{getAffectedURLs($marc)};
+ foreach(@urls)
+ {
+ recordSyncToDB($dbHandler,$conf{"participants"},$bibid,$_);
+ }
+ # logfile_addLine($log,"UPDATE BIBLIO.RECORD_ENTRY SET MARC=\$1 WHERE ID=$bibid");
+ # logfile_addLine($log,$marc);
+ my $query = "UPDATE biblio.record_entry SET marc=\$1 WHERE id=$bibid";
+ my @values = ($marc);
+ dbhandler_updateWithParameters($query,\@values);
+ logfile_addLine($log,"$bibid\thttp://$domainname/eg/opac/record/$bibid?query=yellow;qtype=keyword;locg=4;expand=marchtml#marchtml\thttp://$domainname/eg/opac/record/$bibid?query=yellow;qtype=keyword;locg=4;expand=marchtml#marchtml");
+ }
+
+ logfile_addLogLine($log," ---------------- Script Ending ---------------- ");
+ exit;
+}
+
+sub getAffectedURLs
+{
+ my $marc = @_[0];
+ my @ret=();
+ my $marcobject = MARC::Record->new_from_xml($marc);
+ my @recID = $marcobject->field('856');
+ if(@recID)
+ {
+ for my $rec(0..$#recID)
+ {
+ my $isRelated = decideRelated856(@recID[$rec]);
+ if($isRelated)
+ {
+ my @u = @recID[$rec]->subfield( 'u' );
+ push @ret, @u;
+ }
+ }
+ }
+ return \@ret;
+}
+
+sub recordSyncToDB
+{
+ my $dbHandler = @_[0];
+ my $shortnames = @_[1];
+ my $bibid = @_[2];
+ my $url = @_[3];
+ my $query = "INSERT INTO bib_magic.nine_sync(record,nines_synced,url) VALUES(\$1,\$2,\$3)";
+ my @values = ($bibid,$shortnames,$url);
+ dbhandler_updateWithParameters($query,\@values);
+}
+
+sub getRelatedBibList
+{
+ my @ret;
+ my $query = "
+ SELECT id,marc FROM
+ biblio.record_entry WHERE
+ deleted IS FALSE AND
+ id IN(SELECT record FROM asset.call_number WHERE label=\$\$##URI##\$\$)
+ AND marc ~ '<subfield code=\"7\">$importBIBTagName'
+ ";
+ logfile_addLine($log,$query);
+ my @results = @{dbhandler_query($query)};
+ my $found=0;
+ foreach(@results)
+ {
+ my @row = @{$_};
+ my $prevmarc = @row[1];
+ my $id = @row[0];
+ # print "gathering $id\n";
+ my @temp = ($id,$prevmarc);
+ push @ret,[@temp];
+ }
+ return \@ret;
+}
+
+sub decideRelated856
+{
+ my $field = @_[0];
+ my @sub3 = $field->subfield( '3' );
+ my $ind2 = $field->indicator(2);
+ foreach(@sub3)
+ {
+ if(lc($_) eq 'excerpt')
+ {
+ return 0;
+ }
+ }
+ if($ind2 ne '0')
+ {
+ return 0;
+ }
+ my @s7 = $field->subfield( '7' );
+ if(!@s7)
+ {
+ return 0;
+ }
+ else
+ {
+ my $foundmolib7=0;
+ foreach(@s7)
+ {
+ if($_ eq $importBIBTagName)
+ {
+ $foundmolib7=1;
+ }
+ }
+ if(!$foundmolib7)
+ {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+sub sync9s
+{
+ my $marc = shift;
+ my @recID = $marc->field('856');
+ if(@recID)
+ {
+ #$marc->delete_fields( @recID );
+ for my $rec(0..$#recID)
+ {
+ #print Dumper(@recID[$rec]);
+ my @recordshortnames=();
+ my $isRelated = decideRelated856(@recID[$rec]);
+ if($isRelated)
+ {
+ my $thisField = @recID[$rec];
+ my @ninposes;
+ my $poses=0;
+ #deleting subfields requires knowledge of what position among all of the subfields they reside.
+ #so we have to record at what positions each of the 9's are ahead of time.
+ foreach($thisField->subfields())
+ {
+ my @f = @{$_};
+ if(@f[0] eq '9')
+ {
+ push (@ninposes, $poses);
+ }
+ $poses++;
+ }
+ my @nines = $thisField->subfield("9");
+ my @delete9s = ();
+
+ for my $t(0.. $#shortnames)
+ {
+ my @s7 = @recID[$rec]->subfield( '7' );
+
+ my @subfields = @recID[$rec]->subfield( '9' );
+ my $shortnameexists=0;
+ for my $subs(0..$#subfields)
+ {
+ #print "Comparing ".@subfields[$subs]. " to ".@shortnames[$t]."\n";
+ push @recordshortnames, @subfields[$subs];
+ if(@subfields[$subs] eq @shortnames[$t])
+ {
+ $shortnameexists=1;
+ }
+ }
+ #print "shortname exists: $shortnameexists\n";
+ if(!$shortnameexists)
+ {
+ #print "adding ".@shortnames[$t]."\n";
+ @recID[$rec]->add_subfields('9'=>@shortnames[$t]);
+ }
+ }
+ ## clean up 9's that are not in the list
+ my $ninePos = 0;
+ for my $recshortname(0.. $#recordshortnames)
+ {
+ my $thisname = @recordshortnames[$recshortname];
+ my $foundshortname=0;
+ foreach(@shortnames)
+ {
+ if($_ eq $thisname)
+ {
+ $foundshortname=1;
+ }
+ }
+ if(!$foundshortname)
+ {
+ push(@delete9s, @ninposes[$ninePos]);
+ }
+ $ninePos++;
+ }
+ if($#delete9s > -1)
+ {
+ @recID[$rec]->delete_subfield(code => '9', 'pos' => \@delete9s);
+ }
+
+ }
+ }
+ }
+ return $marc;
+}
+
+sub displayRecPerSec
+{
+ my $type = shift;
+ my $recCount = shift;
+ my $startTime = shift;
+ my $displayInterval = shift;
+ my $recalcTimeInterval = shift;
+ my $totalCount = shift;
+ my $totalRows = shift;
+ $totalRows++; # 0 based to 1 based
+
+ my @ret = ($recCount, $startTime);
+ my $statement = "$type : " . calcRecPerSec($recCount, $startTime) . " Records / Second";
+ print"$statement $totalCount / $totalRows\n" if($recCount % $displayInterval == 0);
+
+ @ret = (0, DateTime->now(time_zone => "local")) if($recCount % $recalcTimeInterval == 0); #time to restart time
+
+ undef $type;
+ undef $recCount;
+ undef $startTime;
+ undef $displayInterval;
+ undef $recalcTimeInterval;
+ undef $totalCount;
+ undef $totalRows;
+ undef $statement;
+ return @ret;
+}
+
+sub calcRecPerSec
+{
+ my $recCount = shift;
+ my $startTime = shift;
+
+ my $afterProcess = DateTime->now(time_zone => "local");
+ my $difference = $afterProcess - $startTime;
+ my $format = DateTime::Format::Duration->new(pattern => '%d %H %M %S');
+ my $duration = $format->format_duration($difference);
+ (my $days, my $hours, my $minutes, my $seconds) = split(/\s/,$duration);
+ my $totalSeconds = ($days * 24 * 60 * 60) + ($hours * 60 * 60) + ($minutes * 60) + $seconds;
+ $totalSeconds = 1 if $totalSeconds < 1;
+ my $ret = $recCount / $totalSeconds;
+ undef $afterProcess;
+ undef $recCount;
+ undef $startTime;
+ undef $difference;
+ undef $format;
+ undef $duration;
+ undef $days;
+ undef $hours;
+ undef $minutes;
+ undef $seconds;
+ undef $totalSeconds;
+ return mobutil_makeEvenWidth(substr($ret,0,7), 15);
+}
+
+sub runReports
+{
+ ## Reporting
+ my $ret = "";
+ my %totals = ();
+ my %grandTotals = ();
+ my @sortStatus = ();
+ my $grandTotalNum = 0;
+
+ ### Overall File Totals
+ my $query = "
+ select filename,status,count(*)
+ from
+ bib_magic.import_status
+ where
+ job = $jobid
+ group by 1,2
+ order by 1,2";
+
+ my @results = @{dbhandler_query($query)};
+ my $currFile = "";
+ my $currFileTotal = 0;
+
+ foreach(@results)
+ {
+ my @row = @{$_};
+ if(@row[0] ne $currFile)
+ {
+ if( ($currFile ne '') && ($totals{$currFile}) )
+ {
+ $ret .= "\r\n\r\n--- $currFile ---\r\n";
+ $ret .= "\t$currFileTotal Total record(s)\r\n";
+ my @a = @{$totals{$currFile}};
+ $ret .= "\t$_\r\n" foreach(@a);
+ $ret .= "\r\n";
+ }
+ $currFileTotal = 0;
+ $currFile = @row[0];
+ my @a = ();
+ $totals{$currFile} = \@a;
+ push(@sortStatus, @row[1]) if(!($grandTotals{@row[1]}));
+ }
+ $currFileTotal += @row[2];
+ my @a = @{$totals{$currFile}};
+ push (@a, @row[2] . " " . @row[1]);
+ $totals{$currFile} = \@a;
+
+ $grandTotals{@row[1]} = 0 if(!($grandTotals{@row[1]}));
+ $grandTotals{@row[1]} += @row[2];
+ $grandTotalNum += @row[2];
+ }
+ undef @results;
+
+ # Report the last loop
+ $ret .= "\r\n\r\n--- $currFile ---\r\n";
+ $ret .= "\t$currFileTotal Total record(s)\r\n";
+ my @a = @{$totals{$currFile}};
+ $ret .= "\t$_\r\n" foreach(@a);
+ $ret .= "\r\n";
+ undef @a;
+
+ @sortStatus = sort @sortStatus;
+
+ if($grandTotalNum > 0)
+ {
+ $ret .= "--- Grand Total ---\r\n";
+ $ret .= "$grandTotalNum Total\r\n";
+ $ret .= $grandTotals{$_} . " $_\r\n" foreach(@sortStatus);
+ $ret .= "\r\n\r\n\r\n";
+ }
+
+ ### Import summary
+ $query = "
+ select
+ z01,title,status,bib
+ from
+ bib_magic.import_status
+ where
+ job = $jobid and
+ type = \$\$importbib\$\$ and
+ status not in ('inserted','matched and overlayed')";
+
+ $ret .= reportSummaryChunk("Interesting Imports",$query);
+
+ ### Removal summary
+ $query = "
+ select
+ z01,title,status,bib
+ from
+ bib_magic.import_status
+ where
+ job = $jobid and
+ type ~ \$\$remov\$\$ and
+ status not in ('removed bib','removed related 856','No matching bib in DB')";
+
+ $ret .= reportSummaryChunk("Interesting Removals",$query);
+
+ $ret .= gatherOutputReport();
+
+ # Authority reports
+ $query = "select marc_xml from bib_magic.import_status where type=\$\$importauth\$\$ and job=$jobid";
+ updateJob("Processing",$query);
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my @row = @{$_};
+ my $batchID = @row[0];
+ # For some reason, eg_staged_bib_overlay prefixes the tables with auths_
+ $batchID = "auths_$batchID";
+ my $interestingImports = "";
+
+ # Gather up the new authority bibs with heading
+ my $query = "
+ select aaa.auth_id,(select left(string_agg(ash.value,', ' ),20) from authority.simple_heading ash where ash.record=aaa.auth_id) from
+ auth_load.$batchID aaa
+ where
+ aaa.auth_id is not null and
+ aaa.imported
+
+ union all
+
+ select aaa.new_auth_id,(select left(string_agg(ash.value,', ' ),20) from authority.simple_heading ash where ash.record=aaa.new_auth_id) from
+ auth_load.$batchID aaa
+ where
+ aaa.new_auth_id is not null and
+ aaa.imported
+ ";
+ logfile_addLogLine($log,$query);
+ my @resultss = @{dbhandler_query($query)};
+ foreach(@resultss)
+ {
+ my @row = @{$_};
+ my $id = @row[0];
+ my $heading = @row[1];
+ $heading =~ tr/\x20-\x7f//cd;
+ $interestingImports .= $id . " '$heading'\r\n";
+ }
+
+ # Gather up the non imported authority bibs with heading
+ my $query = "select auth_id||' '||new_auth_id||' '||cancelled_auth_id,heading from auth_load.$batchID where not imported";
+ logfile_addLogLine($log,$query);
+ my @resultss = @{dbhandler_query($query)};
+ foreach(@resultss)
+ {
+ my @row = @{$_};
+ logfile_addLine($log, join(';', @row) );
+ my $id = @row[0];
+ my $heading = @row[1];
+ $interestingImports .= "not worked - " . $id . " - '$heading'\r\n";
+ }
+ $interestingImports = truncateOutput($interestingImports, 5000);
+ $ret .= "#### Authority Batch $batchID ####\r\n$interestingImports" if ( length($interestingImports) > 0);
+ }
+
+
+ return $ret;
+
+}
+
+sub reportSummaryChunk
+{
+ my $title = shift;
+ my $query = shift;
+ my $ret = "";
+ my $summ = "";
+
+ my @results = @{dbhandler_query($query)};
+
+ if($#results > -1)
+ {
+ my $interestingImports = "";
+ my %status = ();
+ foreach(@results)
+ {
+ my @row = @{$_};
+ my @t = ();
+ $status{@row[2]} = \@t if !$status{@row[2]};
+ @t = @{$status{@row[2]}};
+ @row[1] =~ tr/\x20-\x7f//cd;
+ my @temp = (@row[0],@row[1]);
+ push @t, [@temp];
+ $status{@row[2]} = \@t;
+ }
+ $ret .= "#### $title ####\r\n";
+ while ( (my $key, my $value) = each(%status) )
+ {
+ my @c = @{$value};
+ my $c = $#c;
+ $c++;
+ $ret .= "$key: $c time(s), record details:\r\n";
+ my @vv = @{$value};
+ foreach(@vv)
+ {
+ my @v = @{$_};
+ $interestingImports .= $key . " - '" . @v[0] . "' '" . @v[1] . "'\r\n";
+ }
+ $interestingImports = truncateOutput($interestingImports, 5000);
+ $ret .= "$interestingImports\r\n\r\n";
+ }
+ }
+ return $ret;
+}
+
+sub prepFiles
+{
+ my @files = @{@_[0]};
+ my $dbValPos = 1;
+ my $ret = 0;
+ my $insertTop = "INSERT INTO bib_magic.import_status(filename,bibtag,z01,title,sha1,type,marc_xml,job)
+ VALUES\n";
+ my @vals = ();
+ my $dbInserts = $insertTop;
+ my $rowCount=0;
+
+ for my $b(0..$#files)
+ {
+ my $thisfilename = lc($files[$b]);
+ my $filenameForDB = $files[$b];
+ updateJob("Processing","Parsing: $archivefolder/".$files[$b]);
+ if(! ( ($thisfilename =~ m/\.csv/) || ($thisfilename =~ m/\.tsv/) ) )
+ {
+ my @fsp = split('\.',$thisfilename);
+ my $fExtension = pop @fsp;
+ $fExtension = lc $fExtension;
+ my $file;
+ $file = MARC::File::USMARC->in("$archivefolder/".$files[$b]) if $fExtension !=~ m/xml/;
+ $file = MARC::File::XML->in("$archivefolder/".$files[$b]) if $fExtension =~ m/xml/;
+ my $isRemoval = compareStringToArray($thisfilename,$conf{'removalfiles'});
+ my $isAuthority = compareStringToArray($thisfilename,$conf{'authorityfiles'});
+ my $endOfFile = 0;
+ while(!$endOfFile)
+ {
+ local $@;
+ eval # sometimes $file->next() will bomb. We need to skip a bad record
+ {
+ while ( my $marc = $file->next() )
+ {
+ $dbInserts.="(";
+ $marc = add9($marc) if ( !$isRemoval && !$conf{'import_as_is'} );
+ my $importType = "importbib";
+ $importType = "removal" if $isRemoval;
+ $importType = "importauth" if $isAuthority;
+ my $z01 = getsubfield($marc,'001','');
+ my $t = getsubfield($marc,'245','a');
+ my $sha1 = calcSHA1($marc);
+ $sha1 .= ' '.calcSHA1($marc, 1); # append the baby SHA
+ my $thisXML = convertMARCtoXML($marc);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$filenameForDB);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$importBIBTagNameDB);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$z01);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$t);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$sha1);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$importType);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$thisXML);
+ $dbInserts.="\$$dbValPos";
+ $dbValPos++;
+ push(@vals,$jobid);
+ $dbInserts.="),\n";
+ $rowCount++;
+ ($dbInserts, $dbValPos, @vals) = dumpRowsIfFull($insertTop, $dbInserts, $dbValPos, \@vals);
+ $ret = 1;
+ last if $isAuthority; # Authority loads via external script and just needs the file name
+ }
+ $endOfFile = 1;
+ 1;
+ } or do
+ {
+ $rowCount++;
+ logfile_addLine($log,"$thisfilename:BAD RECORD $rowCount");
+ print "$thisfilename:BAD RECORD $rowCount\n" if $debug;
+ };
+ }
+ $file->close();
+ undef $file;
+ }
+ else
+ {
+ my $tfile = $archivefolder."/".$files[$b];
+ my @lines = @{logfile_readFile($tfile)};
+ my $commas = 0;
+ my $tabs = 0;
+ foreach(@lines)
+ {
+ my @split = split(/,/,$_);
+ $commas+=$#split;
+ @split = split(/\t/,$_);
+ $tabs+=$#split;
+ }
+ my $delimiter = $commas > $tabs ? "," : "\t";
+ foreach(@lines)
+ {
+ my $fullLine = $_;
+ my @split = split(/$delimiter/,$_);
+ foreach(@split)
+ {
+ my $ent = mobutil_trim($_);
+ $ent =~ s/\D//g;
+ if( ( length($ent) == 13 ) or ( length($ent) == 10 ) )
+ {
+ $dbInserts.="(";
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$filenameForDB);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$importBIBTagNameDB);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$ent);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$ent);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$ent);
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,"isbn_remove");
+ $dbInserts.="\$$dbValPos,";
+ $dbValPos++;
+ push(@vals,$fullLine);
+ $dbInserts.="\$$dbValPos";
+ $dbValPos++;
+ push(@vals,$jobid);
+ $dbInserts.="),\n";
+ $rowCount++;
+ ($dbInserts, $dbValPos, @vals) = dumpRowsIfFull($insertTop, $dbInserts, $dbValPos, \@vals);
+ $ret = 1;
+ }
+ }
+ }
+ }
+ }
+ dumpRowsIfFull($insertTop, $dbInserts, $dbValPos, \@vals, 1) if $dbValPos > 1; # Dump what's left into the DB
+
+ return $ret;
+}
+
+sub dumpRowsIfFull
+{
+ my $insertTop = @_[0];
+ my $dbInserts = @_[1];
+ my $count = @_[2];
+ my @vals = @{@_[3]};
+ my $dumpAnyway = @_[4] || 0;
+
+ my @ret = ($dbInserts,$count,@vals);
+
+ # Dump to database every 5000 values or when explicitly called for
+ if( ($count > 5000) || $dumpAnyway )
+ {
+ $dbInserts = substr($dbInserts,0,-2); #chopping off [,\n]
+ my $readyForStatusUpdate = $debug ? $dbInserts : '';
+ if($debug) #slow slow slow - only debug mode
+ {
+ $readyForStatusUpdate = "\n";
+ my $i = 1;
+ foreach(@vals)
+ {
+ my $temp = $_;
+ $temp = substr($temp,0,100) if(length($temp) > 100);
+ $readyForStatusUpdate =~ s/\$$i([,\)])/\$data\$$temp\$data\$$1/;
+ $i++;
+ }
+ }
+ updateJob("Processing","Dumping memory to DB $count $readyForStatusUpdate");
+ logfile_addLine($log,"Final insert statement:\n$dbInserts") if $debug;
+ print "Dropping off in DB\n" if $debug;
+ dbhandler_updateWithParameters($dbInserts,\@vals);
+ print "Just got back from insert\n" if $debug;
+ undef $dbInserts;
+ my $dbInserts = $insertTop;
+ undef @vals;
+ my @vals = ();
+ @ret = ($dbInserts,1,@vals);
+ }
+
+ return @ret;
+}
+
+sub resetJob
+{
+ my $resetJob = shift;
+ my $ret = 0;
+ my $query = "select count(*) from bib_magic.import_status where job=$jobid";
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ $ret=@row[0];
+ }
+ if($ret)
+ {
+ $query = "update bib_magic.import_status set status=\$1 , processed = false , bib = null , row_change_time = now() where job= \$2";
+ updateJob("Processing","--Reset Job - \n$query");
+ my @vals = ('new',$jobid);
+ dbhandler_updateWithParameters($query,\@vals);
+ }
+
+ return $ret;
+}
+
+sub sendWelcomeMessage
+{
+ my @files = @{$_[0]};
+ my $files = join("\r\n",@files);
+ my @tolist = ($conf{"alwaysemail"});
+ my $body =
+"Hello,
+
+Just letting you know that I have begun processing the provided files:
+$files";
+ $body .= "\r\n
+This software is configured to perform deep search matches against the database. This is slow but thorough.
+Depending on the number of records, it could be days before you receive the finished message. FYI." if($searchDeepMatch);
+
+ $body .= "
+I'll send a follow-up email when I'm done.
+
+Yours Truly,
+The friendly server
+";
+
+ logfile_addLine($log,"Sending Welcome message:\r\n$body");
+
+ my $email = email_setup($conf{"fromemail"},\@tolist,1,1,\%conf);
+ email_send($email,"Evergreen Electronic Import Summary - $importBIBTagName Import Report Job # $jobid WINDING UP", $body);
+
+}
+
+sub alertErrorEmail
+{
+ my $error = shift;
+ my @tolist = ($conf{"alwaysemail"});
+ my $email = email_setup($conf{"fromemail"},\@tolist,1,0,\%conf);
+ email_send($email,"Evergreen Utility - $importBIBTagName Import Report Job # $jobid - ERROR","$error\r\n\r\n-Evergreen Perl Squad-");
+}
+
+sub truncateOutput
+{
+ my $ret = @_[0];
+ my $length = @_[1];
+ if(length($ret)>$length)
+ {
+ $ret = substr($ret,0,$length)."\nTRUNCATED FOR LENGTH\n\n";
+ }
+ return $ret;
+}
+
+sub gatherOutputReport
+{
+ my $ret = "";
+ my $newRecordCount = 0;
+ my $updatedRecordCount = 0;
+ my $mergedRecords = '';
+ my $itemsAssignedRecords = '';
+
+ #bib_marc_update table report new bibs
+ my $query = "select count(*) from bib_magic.bib_marc_update where job=$jobid and new_record is true";
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ $newRecordCount=@row[0];
+ }
+ #bib_marc_update table report non new bibs
+ $query = "select count(*) from bib_magic.bib_marc_update where job=$jobid and new_record is not true";
+ @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ $updatedRecordCount=@row[0];
+ }
+
+ #bib_merge table report
+ $query = "select leadbib,subbib from bib_magic.bib_merge where job=$jobid";
+ @results = @{dbhandler_query($query)};
+ my $count=0;
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ $mergedRecords.=@row[0]." < ".@row[1]."\r\n";
+ $count++;
+ }
+ if($count>0)
+ {
+ $mergedRecords = truncateOutput($mergedRecords,5000);
+ $mergedRecords="$count records were merged - The left number is the winner\r\n".$mergedRecords;
+ $mergedRecords."\r\n\r\n\r\n";
+ }
+
+ #item_reassignment table report
+ $query = "select target_bib,prev_bib from bib_magic.item_reassignment where job=$jobid";
+ @results = @{dbhandler_query($query)};
+ $count=0;
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ $itemsAssignedRecords.=@row[0]." < ".@row[1]."\r\n";
+ $count++;
+ }
+ if($count>0)
+ {
+ $itemsAssignedRecords = truncateOutput($itemsAssignedRecords,5000);
+ $itemsAssignedRecords="$count Records had physical items assigned - The left number is where the items were moved\r\n".$itemsAssignedRecords;
+ $itemsAssignedRecords."\r\n\r\n\r\n";
+ }
+
+ $ret .= $newRecordCount." New record(s) were created.\r\n\r\n\r\n" if($newRecordCount > 0);
+ $ret .= $updatedRecordCount." Record(s) were updated\r\n\r\n\r\n" if($updatedRecordCount > 0);
+ $ret .= $mergedRecords if ($mergedRecords ne '');
+ $ret .= $itemsAssignedRecords if ($itemsAssignedRecords ne '');
+
+ #print $ret;
+ return $ret;
+
+}
+
+sub deleteFiles
+{
+ my @files = @{@_[0]};
+ foreach(@files)
+ {
+ my $t = "$archivefolder/$_";
+ logfile_addLogLine($log,"Deleting $_");
+ logfile_deleteFile($t);
+ undef $t;
+ }
+}
+
+sub parseFileName
+{
+ my $fullPath = @_[0];
+ my @sp = split('/',$fullPath);
+ my $path=substr($fullPath,0,( (length(@sp[$#sp]))*-1) );
+
+ my @fsp = split('\.',@sp[$#sp]);
+ my $fExtension = pop @fsp;
+ my $baseFileName = join('.', @fsp);
+ $baseFileName= Encode::encode("CP1252", $baseFileName);
+ my @ret = ($path,$baseFileName,$fExtension);
+ return \@ret;
+}
+
+sub moveFile
+{
+ my $file = shift;
+ my $destination = shift;
+
+ if( logfile_copyFile($file,$destination) )
+ {
+ if(! (unlink($file)) )
+ {
+ print "Unable to delete $file";
+ return 0;
+ }
+ }
+ return 1;
+}
+
+sub getmarc
+{
+ my @ret;
+ if( (lc$conf{"recordsource"} ne 'folder') && (lc$conf{"recordsource"} ne 'ftp') && (lc$conf{"recordsource"} ne 'cloudlibrary') && (lc$conf{"recordsource"} ne 'marcivehttps') )
+ {
+ logfile_addLogLine($log,"Unsupported external source: " . lc$conf{"recordsource"});
+ exit;
+ }
+ @ret = @{getmarcFromFolder()} if(lc$conf{"recordsource"} eq 'folder');
+ @ret = @{getmarcFromFTP()} if(lc$conf{"recordsource"} eq 'ftp');
+ @ret = @{getMarcFromCloudlibrary()} if(lc$conf{"recordsource"} eq 'cloudlibrary');
+ @ret = @{getMarcFromMarciveHTTPS()} if(lc$conf{"recordsource"} eq 'marcivehttps');
+ return \@ret;
+}
+
+sub getmarcFromFolder
+{
+ my $incomingfolder = $conf{'incomingmarcfolder'};
+
+ my @ret = ();
+ my @files;
+ #Get all files in the directory path
+ @files = @{dirtrav(\@files,$incomingfolder)};
+
+ foreach(@files)
+ {
+ my $filename = $_;
+ my @filePathParse = @{parseFileName($filename)};
+ $filename = @filePathParse[1].'.'.@filePathParse[2];
+ my $download = decideToDownload($filename);
+
+ if($download)
+ {
+ if(-e "$archivefolder/$filename")
+ {
+ my $size = stat("$archivefolder/$filename")->size; #[7];
+ my $rsize = stat("$incomingfolder/$filename")->size;
+ logfile_addLogLine($log,"$filename Local: $size Remote: $rsize");
+ if($size ne $rsize)
+ {
+ logfile_addLine($log,"$archivefolder/$filename differes in size remote $filename");
+ unlink("$archivefolder/$filename");
+ }
+ else
+ {
+ logfile_addLine($log,"skipping $filename");
+ unlink(@filePathParse[0].'/'.$filename);
+ $download=0;
+ }
+ }
+ else
+ {
+ logfile_addLogLine($log,"NEW $filename");
+ }
+ if($download)
+ {
+ my $worked = moveFile(@filePathParse[0].'/'.$filename,"$archivefolder/$filename");
+ if($worked)
+ {
+ push (@ret, $filename);
+ }
+ }
+ }
+ }
+ logfile_addLine($log,Dumper(\@ret));
+ return \@ret;
+}
+
+sub getmarcFromFTP
+{
+ my $server = $conf{'server'};
+ $server=~ s/http:\/\///gi;
+ $server=~ s/ftp:\/\///gi;
+
+ my $loops=0;
+ my $login = $conf{'login'};
+ my $password = $conf{'password'};
+ my @ret = ();
+
+ logfile_addLogLine($log,"**********FTP starting -> $server with $login and $password");
+
+ my $ftp = Net::FTP->new($server, Debug => 0, Passive=> 1)
+ or die logfile_addLogLine($log,"Cannot connect to ".$server);
+ $ftp->login($login,$password)
+ or die logfile_addLogLine($log,"Cannot login ".$ftp->message);
+ $ftp->cwd($remotefolder);
+ my @interestingFiles = ();
+ @interestingFiles = @{ftpRecurse($ftp, \@interestingFiles)};
+ logfile_addLine($log,Dumper(\@interestingFiles)) if $debug;
+ foreach(@interestingFiles)
+ {
+ my $filename = $_;
+ my $download = decideToDownload($filename);
+
+ if($download)
+ {
+ if(-e "$archivefolder/"."$filename")
+ {
+ my $size = stat("$archivefolder/"."$filename")->size; #[7];
+ my $rsize = findFTPRemoteFileSize($filename, $ftp, $size);
+ if($size ne $rsize)
+ {
+ logfile_addLine($log,"Local: $size") if $debug;
+ logfile_addLine($log,"remot: $rsize") if $debug;
+ logfile_addLine($log,"$archivefolder/"."$filename differes in size");
+ unlink("$archivefolder/"."$filename");
+ }
+ else
+ {
+ logfile_addLine($log,"skipping $filename");
+ $download=0;
+ }
+ }
+ else
+ {
+ logfile_addLine($log,"NEW $filename");
+ }
+ if($download)
+ {
+ my $path = $archivefolder."/".$filename;
+ $path = substr($path,0,rindex($path,'/'));
+ # logfile_addLine($log,"Path = $path");
+ if(!-d $path)
+ {
+ logfile_addLine($log,"$path doesnt exist - creating directory");
+ make_path($path, {
+ verbose => 0,
+ mode => 0755,
+ });
+ }
+ my $worked = $ftp->get($filename,"$archivefolder/$filename");
+ if($worked)
+ {
+ push (@ret, "$filename");
+ }
+ }
+ }
+ }
+ $ftp->quit
+ or die logfile_addLogLine($log,"Unable to close FTP connection");
+ logfile_addLogLine($log,"**********FTP session closed ***************");
+ logfile_addLine($log,Dumper(\@ret));
+ return \@ret;
+}
+
+sub findFTPRemoteFileSize
+{
+ my $filename = shift;
+ my $ftp = shift;
+ my $localFileSize = shift;
+
+ my $rsize = $ftp->size($filename);
+
+ my $testUsingDirMethod = 0;
+ $testUsingDirMethod = 1 if($rsize =~ m/\D/);
+ $testUsingDirMethod = 1 if( (!$testUsingDirMethod) && (length($rsize) <4) );
+ if($testUsingDirMethod)
+ {
+ my @rsizes = $ftp->dir($filename);
+ $rsize = @rsizes[0] ? @rsizes[0] : '0';
+ #remove the filename from the string
+ my $rfile = $filename;
+ # parenthesis and slashes in the filename screw up the regex
+ $rfile =~ s/\(/\\(/g;
+ $rfile =~ s/\)/\\)/g;
+ $rfile =~ s/\//\\\//g;
+ $rsize =~ s/$rfile//g;
+ logfile_addLine($log,$rsize);
+ my @split = split(/\s+/, $rsize);
+
+ # Build the year string
+ my $dt = DateTime->now(time_zone => "local");
+ my $fdate = $dt->ymd;
+ my $year = substr($fdate,0,4);
+ my @years = ();
+ my $i = 0;
+ push @years, $year-- while($i++ < 3);
+ $year = substr($fdate,0,4);
+ $i = 0;
+ push @years, $year++ while($i++ < 3);
+ $year = '';
+ $year.="$_ " foreach(@years);
+
+ foreach(@split)
+ {
+ # Looking for something that looks like a filesize in bytes. Example output from ftp server:
+ # -rwxr-x--- 1 northkcm System 15211006 Apr 25 2019 Audio_Recorded Books eAudio Adult Subscription_4_25_2019.mrc
+ # -rwxr-x--- 1 scenicre System 9731 Apr 09 2018 Zinio_scenicregionalmo_2099_Magazine_12_1_2017.mrc
+ # We can expect a file that contains a single marc record to be reasonable in size ( > 1k)
+ # Therefore, we need to find a string of at least 4 numeric characters. Need to watch out for "year" numerics
+
+ next if($_ =~ m/\D/); #ignore fields containing anything other than numbers
+ next if index($year,$_) > -1; #ignore fields containing a year value close to current year
+
+ if(length($_) > 3)
+ {
+ $rsize = $_;
+ # if we find that one of the values exactly matches local file size, then we just set it to that
+ last if($localFileSize eq $_);
+
+ # Need to allow for a small filesize margin to compensate for the ASCII byte count versus UTF-8
+ # This isn't exactly perfect, but I'm going with a margin of 98 percent
+ my $lowerNumber = $localFileSize;
+ my $higherNumber = $_;
+ $lowerNumber = $_ if $_ < $localFileSize;
+ $higherNumber = $localFileSize if $_ < $localFileSize;
+ my $percent = ($lowerNumber / $higherNumber) * 100;
+ logfile_addLine($log,"Filesize percentage: $percent") if $debug;
+ if($percent > 98)
+ {
+ # Make them equal
+ $rsize = $localFileSize;
+ last;
+ }
+ }
+ }
+ }
+ return $rsize;
+}
+
+sub getMarcFromCloudlibrary
+{
+ my $startDate = '0001-01-01';
+ my $lastRemovalDate = '0001-01-01';
+ if( -e $lastDateRunFilePath)
+ {
+ my $previousRunDateTimeFile = $lastDateRunFilePath;
+ my @previousRunTime = @{logfile_readFile($previousRunDateTimeFile)};
+
+ # It's always going to be the first line in the file
+ my $previousRunTime = @previousRunTime[0];
+ $previousRunTime =~ s/\n$//g;
+ my $lastRemovalRunTime = @previousRunTime[1];
+ $lastRemovalRunTime =~ s/\n$//g;
+ logfile_addLine($log,"reading last run file and got $previousRunTime and $lastRemovalRunTime");
+ my ($y,$m,$d) = $previousRunTime =~ /^([0-9]{4})\-([0-9]{2})\-([0-9]{2})\z/
+ or die;
+ $startDate = $y.'-'.$m.'-'.$d;
+ my ($y,$m,$d) = $lastRemovalRunTime =~ /^([0-9]{4})\-([0-9]{2})\-([0-9]{2})\z/
+ or die;
+ $lastRemovalDate = $y.'-'.$m.'-'.$d;
+ }
+
+ my $dateNow = DateTime->now(time_zone => "GMT");
+
+ my $endDate = $dateNow->ymd();
+
+ my @newRecords = @{_getMarcFromCloudlibrary($startDate, $endDate)};
+ # Done gathering up new records.
+
+ # Decide if it's been long enough to check for deletes
+ my $dateNow = DateTime->today( time_zone => "GMT" );
+ my ($y,$m,$d) = $lastRemovalDate =~ /^([0-9]{4})\-([0-9]{2})\-([0-9]{2})\z/
+ or die;
+ my $previousDate = new DateTime({ year => $y, month=> $m, day => $d });
+ $previousDate->truncate( to => 'day' );
+ my $difference = $dateNow - $previousDate;
+ my $format = DateTime::Format::Duration->new(pattern => '%Y %m %e');
+ my $duration = $format->format_duration($difference);
+ logfile_addLine($log,"duration raw = $duration");
+ my ($y,$m,$d) = $duration =~ /^([^\s]*)\s([^\s]*)\s([^\s]*)/;
+
+
+ logfile_addLine($log,"Duration from last deletes: $dateNow minus $previousDate = $y years, $m months $d days apart");
+ $duration = $y*365;
+ $duration+= $m*30;
+ $duration+= $d;
+ logfile_addLine($log,"Duration = ".$duration);
+ $duration = $duration*1;
+ my $removeOutput = '';
+ if($duration > 30)
+ {
+ my $bib_sourceid = getbibsource();
+ my $queryDB = "SELECT record,value,(select marc from biblio.record_entry where id=a.record) from metabib.real_full_rec a where
+ record in(select id from biblio.record_entry where not deleted and
+ source in($bib_sourceid) and create_date < '$startDate') and
+ tag='035'";
+ updateJob("Processing",$queryDB);
+ my @results = @{dbhandler_query($queryDB)};
+ foreach(@results)
+ {
+ my @row = @{$_};
+ my $bib = @row[0];
+ my $itemID = @row[1];
+ $itemID =~ s/^[^\s]*\s([^\s]*)/\1/g;
+ logfile_addLine($log,"Item ID = $itemID");
+ my $marcxml = @row[2];
+ my @checkIfRemoved = @{_getMarcFromCloudlibrary($startDate, $endDate, $itemID)};
+ # not found in the cloudlibrary collection, add it to the remove array
+ if($#checkIfRemoved == -1)
+ {
+ $marcxml =~ s/(<leader>.........)./${1}a/;
+ my $marcobj = MARC::Record->new_from_xml($marcxml);
+ $removeOutput.= convertMARCtoXML($marcobj);;
+ }
+ }
+ }
+
+
+ my $outputFileName = mobutil_chooseNewFileName($archivefolder,"import$endDate","xml");
+ my $outputFileRemove = mobutil_chooseNewFileName($archivefolder,"import$endDate"."remove","xml");
+
+ my @ret = ();
+
+ my $newOutput = '';
+ foreach(@newRecords)
+ {
+ $newOutput .= convertMARCtoXML($_);
+ }
+
+ if(length($newOutput) > 0)
+ {
+ logfile_appendLine($outputFileName,$newOutput);
+ $outputFileName =~ s/$archivefolder//;
+ push (@ret, $outputFileName);
+ }
+
+ if(length($removeOutput) > 0)
+ {
+ logfile_appendLine($outputFileRemove,$removeOutput);
+ $outputFileRemove =~ s/$archivefolder//;
+ push (@ret, $outputFileRemove);
+ }
+
+ logfile_addLine($log,Dumper(\@ret));
+ return \@ret;
+}
+
+sub _getMarcFromCloudlibrary
+{
+ my $baseURL = $conf{"server"};
+ my $library = $conf{"login"};
+ my $apikey = $conf{"password"};
+ my $startDate = @_[0];
+ my $endDate = @_[1];
+ my $recordID = @_[2];
+ my $uri = "/cirrus/library/$library/data/marc";
+
+ updateJob("Processing","Starting API connection");
+ my $records = 1;
+ my $offset = 1;
+ my $resultGobLimit = 50;
+ my @allRecords = ();
+ my $stop = 0;
+ while( ($records && !$stop) )
+ {
+ $records = 0;
+ my $query = "?startdate=$startDate&enddate=$endDate&offset=$offset&limit=$resultGobLimit";
+ $uri .= "/$recordID" if $recordID;
+ $query = '' if $recordID;
+ my $date = DateTime->now(time_zone => "GMT");
+
+ my $dayofmonth = $date->day();
+ $dayofmonth = '0'.$dayofmonth if length($dayofmonth) == 1;
+
+ my $timezonestring = $date->time_zone_short_name();
+ $timezonestring =~ s/UTC/GMT/g;
+ my $dateString = $date->day_abbr().", ".
+ $dayofmonth." ".
+ $date->month_abbr()." ".
+ $date->year()." ".
+ $date->hms()." ".
+ $timezonestring;
+
+ my $digest = hmac_sha256_base64($dateString."\nGET\n".$uri.$query, $apikey);
+ while (length($digest) % 4) {
+ $digest .= '=';
+ }
+
+ my $client = REST::Client->new({
+ host => $baseURL,
+ cert => $cert,
+ key => $certKey,
+ });
+ updateJob("Processing","API query: GET, $uri$query");
+ my $answer = $client->request('GET',$uri.$query,'',
+ {
+ '3mcl-Datetime'=>$dateString,
+ '3mcl-Authorization'=>"3MCLAUTH library.$library:".$digest,
+ '3mcl-apiversion'=>"3mcl-apiversion: 2.0"
+ }
+ );
+ my $allXML = $answer->responseContent();
+ # logfile_addLine($log,$allXML);
+ my @unparsableEntities = ("ldquo;","\"","rdquo;","\"","ndash;","-","lsquo;","'","rsquo;","'","mdash;","-","supl;","");
+ my $i = 0;
+ while(@unparsableEntities[$i])
+ {
+ my $loops = 0;
+ while(index($allXML,@unparsableEntities[$i]) != -1)
+ {
+ my $index = index($allXML,@unparsableEntities[$i]);
+ my $find = @unparsableEntities[$i];
+ my $findlength = length($find);
+ my $first = substr($allXML,0,$index);
+ $index+=$findlength;
+ my $last = substr($allXML,$index);
+ my $rep = @unparsableEntities[$i+1];
+ #logfile_addLine($log,"Found at $index and now replacing $find");
+ $allXML = $first.$rep.$last;
+ # $allXML =~ s/(.*)&?$find(.*)/$1$rep$2/g;
+ #logfile_addLine($log,"just replaced\n$allXML");
+ $loops++;
+ exit if $loops > 15;
+ }
+ $i+=2;
+ }
+ #logfile_addLine($log,"after scrubbed\n$allXML");
+ $allXML = decode_entities($allXML);
+ $allXML =~ s/(.*)<\?xml[^>]*>(.*)/$1$2/;
+ $allXML =~ s/(<marc:collection[^>]*>)(.*)/$2/;
+ $allXML =~ s/(.*)<\/marc:collection[^>]*>(.*)/$1$2/;
+ my @records = split("marc:record",$allXML);
+ updateJob("Processing","Received ".$#records." records");
+ foreach (@records)
+ {
+ # Doctor the xml
+ my $thisXML = $_;
+ $thisXML =~ s/^>//;
+ $thisXML =~ s/<\/?$//;
+ $thisXML =~ s/<(\/?)marc\:/<$1/g;
+ if(length($thisXML) > 0)
+ {
+ $thisXML =~ s/>\s+</></go;
+ $thisXML =~ s/\p{Cc}//go;
+ $thisXML = OpenILS::Application::AppUtils->entityize($thisXML);
+ $thisXML =~ s/[\x00-\x1f]//go;
+ $thisXML =~ s/^\s+//;
+ $thisXML =~ s/\s+$//;
+ $thisXML =~ s/<record><leader>/<leader>/;
+ $thisXML =~ s/<collection/<record/;
+ $thisXML =~ s/<\/record><\/collection>/<\/record>/;
+
+ my $record = MARC::Record->new_from_xml("<record>".$thisXML."</record>");
+ push (@allRecords, $record);
+ $offset++;
+ }
+ }
+ $records = 1 if($#records > -1);
+ #$stop = 1 if($#allRecords > 200);
+ logfile_addLine($log,"records = $records and stop = $stop");
+ logfile_addLine($log,"Record Count: ".$#allRecords);
+ }
+ return \@allRecords;
+
+}
+
+sub getMarcFromMarciveHTTPS
+{
+
+ use pQuery;
+
+ if( (length($conf{'server'}) < 4) || (length($conf{'remotefolder'}) < 4) )
+ {
+ logfile_addLogLine($log,"Marcive settings for 'server' and 'remotefolder' are insufficient: '".$conf{'server'}."' / '".$conf{'remotefolder'}."'");
+ exit;
+ }
+ my $server = $conf{'server'}.'&s='.$conf{'remotefolder'};
+ my @ret = ();
+
+ logfile_addLogLine($log,"**********MARCIVE HTTPS starting -> $server");
+ my @interestingFiles = ();
+
+ my $rowNum = 0;
+ logfile_addLine($log,pQuery->get($server)->content);
+ pQuery($server)->find("tr")->each(sub {
+ if($rowNum > 0) ## Skipping the title row
+ {
+ my $i = shift;
+ my $row = $_;
+ my $colNum = 0;
+ my %file = (filename => '', size => '', downloadlink => '');
+ pQuery("td",$row)->each(sub {
+ shift;
+ if($colNum == 0) # filename
+ {
+ pQuery("a",$_)->each(sub {
+ my $a_html = pQuery($_)->toHtml;
+ shift;
+ $file{'filename'} = pQuery($_)->text();
+ $a_html =~ s/.*?href=['"]([^'"]*)['"].*$/$1/g;
+ $file{'downloadlink'} = $a_html;
+ });
+ }
+ elsif($colNum == 1)
+ {
+ my @t = split(/\s/,pQuery($_)->text());
+ $file{'size'} = @t[0];
+ }
+ $colNum++;
+ });
+ push (@interestingFiles, \%file);
+ }
+ $rowNum++;
+ });
+
+ logfile_addLine($log,Dumper(\@interestingFiles)) if $debug;
+ foreach(@interestingFiles)
+ {
+ my %file = %{$_};
+ my $filename = $file{'filename'};
+ my $download = decideToDownload($filename);
+
+ if($download)
+ {
+ if(-e "$archivefolder/"."$filename")
+ {
+ my $size = stat("$archivefolder/"."$filename")->size; #[7];
+ my $rsize = $file{'size'};
+ logfile_addLine($log,"Local: $size") if $debug;
+ logfile_addLine($log,"remot: $rsize") if $debug;
+ if($size ne $rsize)
+ {
+ logfile_addLine($log,"Local: $size") if $debug;
+ logfile_addLine($log,"remot: $rsize") if $debug;
+ logfile_addLine($log,"$archivefolder/"."$filename differes in size");
+ unlink("$archivefolder/"."$filename");
+ }
+ else
+ {
+ logfile_addLine($log,"skipping $filename");
+ $download=0;
+ }
+ }
+ else
+ {
+ logfile_addLine($log,"NEW $filename");
+ }
+ if($download)
+ {
+ my $path = $archivefolder."/".$filename;
+ $path = substr($path,0,rindex($path,'/'));
+ if(!-d $path)
+ {
+ logfile_addLine($log,"$path doesnt exist - creating directory");
+ make_path($path, {
+ verbose => 0,
+ mode => 0755,
+ });
+ }
+ $path = $archivefolder."/".$filename;
+ getstore($file{'downloadlink'}, $path);
+ if(-e $path)
+ {
+ push (@ret, "$filename");
+ }
+ else
+ {
+ logfile_addLogLine($log,"Unable to download ".$file{'downloadlink'});
+ }
+ }
+ }
+ }
+ logfile_addLogLine($log,"********** MARCIVE HTTPS DONE ***************");
+ logfile_addLine($log,Dumper(\@ret));
+ return \@ret;
+}
+
+sub ftpRecurse
+{
+ my $ftpOb = @_[0];
+ my @interestingFiles = @{@_[1]};
+ # return \@interestingFiles if($#interestingFiles > 2);
+
+ my @remotefiles = $ftpOb->ls();
+ foreach(@remotefiles)
+ {
+ # logfile_addLine($log,"pwd = ".$ftpOb->pwd." cwd into $_");
+ if($ftpOb->cwd($ftpOb->pwd."/".$_)) #it's a directory
+ {
+ #let's go again
+ @interestingFiles = @{ftpRecurse($ftpOb,\@interestingFiles)} if $recurseFTP;
+ #logfile_addLine($log,"going to parent dir from = ".$ftpOb->pwd);
+ $ftpOb->cdup();
+ }
+ else #it's not a directory
+ {
+ my $pwd = $ftpOb->pwd;
+ if(decideToDownload($_))
+ {
+ my $full = $pwd."/".$_;
+ push (@interestingFiles , $full);
+ }
+ }
+ }
+ return \@interestingFiles;
+}
+
+sub decideToDownload
+{
+ my $filename = @_[0];
+ $filename = lc($filename);
+ my $download = 1;
+ my @onlyProcess = ();
+ if($conf{'onlyprocess'}) # This is optional but if present, very restrictive
+ {
+ my $go = 0;
+ $go = compareStringToArray($filename,$conf{'onlyprocess'});
+ logfile_addLogLine($log,"Ignoring file $filename because it didn't contain one of these: ".$conf{'onlyprocess'}) if !$go;
+ return 0 if !$go;
+ }
+
+ $download = 0 if ( compareStringToArray($filename,$conf{'ignorefiles'}) );
+ logfile_addLogLine($log,"Ignoring file $filename due to a match in ".$conf{'ignorefiles'}) if !$download;
+ return $download;
+}
+
+sub compareStringToArray
+{
+ my $wholeString = @_[0];
+ my $tphrases = @_[1];
+ return 0 if !$tphrases;
+ my @phrases = split(/\s/,$tphrases);
+ my $ret = 0;
+ $wholeString = lc $wholeString;
+ foreach(@phrases)
+ {
+ my $phrase = lc $_;
+ return 1 if ($wholeString =~ /$phrase/);
+ }
+ return $ret;
+}
+
+sub add9
+{
+ my $marc = @_[0];
+ my @recID = $marc->field('856');
+ if(@recID)
+ {
+ #$marc->delete_fields( @recID );
+ for my $rec(0..$#recID)
+ {
+ #print Dumper(@recID[$rec]);
+ for my $t(0.. $#shortnames)
+ {
+ my @sub3 = @recID[$rec]->subfield( '3' );
+ my $ind2 = @recID[$rec]->indicator(2);
+ my $ignore=0;
+ foreach(@sub3)
+ {
+ if(lc($_) eq 'excerpt')
+ {
+ $ignore=1;
+ }
+ }
+ if($ind2 ne '0')
+ {
+ $ignore=1;
+ }
+ if(!$ignore)
+ {
+ my @s7 = @recID[$rec]->subfield( '7' );
+ if($#s7==-1)
+ {
+ @recID[$rec]->add_subfields('7'=>$importBIBTagName);
+ }
+ my @subfields = @recID[$rec]->subfield( '9' );
+ my $shortnameexists=0;
+ for my $subs(0..$#subfields)
+ {
+ #print "Comparing ".@subfields[$subs]. " to ".@shortnames[$t]."\n";
+ if(@subfields[$subs] eq @shortnames[$t])
+ {
+ # print "Same!\n";
+ $shortnameexists=1;
+ }
+ }
+ #print "shortname exists: $shortnameexists\n";
+ if(!$shortnameexists)
+ {
+ #print "adding ".@shortnames[$t]."\n";
+ @recID[$rec]->add_subfields('9'=>@shortnames[$t]);
+ }
+ }
+ }
+ }
+ }
+ return $marc;
+}
+
+## All calls to this function have been removed because the database functions seem to babysit this pretty well
+sub removeOldCallNumberURI
+{
+ my $bibid = @_[0];
+
+ my $uriids = '';
+ my $query = "select uri from asset.uri_call_number_map WHERE call_number in
+ (
+ SELECT id from asset.call_number WHERE record = $bibid AND label = \$\$##URI##\$\$
+ )";
+ updateJob("Processing","$query");
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my @row = @{$_};
+ $uriids.=@row[0].",";
+ }
+ $uriids = substr($uriids,0,-1);
+
+ my $query = "
+ DELETE FROM asset.uri_call_number_map WHERE call_number in
+ (
+ SELECT id from asset.call_number WHERE record = $bibid AND label = \$\$##URI##\$\$
+ )
+ ";
+ updateJob("Processing","$query");
+ dbhandler_update($query);
+ $query = "
+ DELETE FROM asset.uri_call_number_map WHERE call_number in
+ (
+ SELECT id from asset.call_number WHERE record = $bibid AND label = \$\$##URI##\$\$
+ )";
+ updateJob("Processing","$query");
+ dbhandler_update($query);
+
+ if(length($uriids) > 0)
+ {
+ $query = "DELETE FROM asset.uri WHERE id in ($uriids)";
+ updateJob("Processing","$query");
+ dbhandler_update($query);
+ }
+ $query = "
+ DELETE FROM asset.call_number WHERE record = $bibid AND label = \$\$##URI##\$\$
+ ";
+ updateJob("Processing","$query");
+ dbhandler_update($query);
+ $query = "
+ DELETE FROM asset.call_number WHERE record = $bibid AND label = \$\$##URI##\$\$
+ ";
+ updateJob("Processing","$query");
+ dbhandler_update($query);
+
+}
+
+sub recordAssetCopyMove
+{
+ my $oldbib = @_[0];
+ my $newbib = @_[1];
+ my $eMatchString = @_[2];
+ my $statusID = @_[2];
+ my $query = "select id from asset.copy where call_number in(select id from asset.call_number where record in($oldbib) and label!=\$\$##URI##\$\$) and not deleted";
+ my @cids;
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my @row = @{$_};
+ push(@cids,@row[0]);
+ }
+
+ foreach(@cids)
+ {
+ print "There were asset.copies on $oldbib even after attempting to put them on a deduped bib\n";
+ logfile_addLine($log,"\t$oldbib\tContained physical Items");
+ $query = "
+ INSERT INTO bib_magic.item_reassignment(copy,prev_bib,target_bib,statusid,job)
+ VALUES ($_,$oldbib,$newbib,$statusID, $jobid)";
+ logfile_addLine($log,"$query");
+ updateJob("Processing","recordAssetCopyMove $query");
+ dbhandler_update($query);
+ }
+}
+
+sub recordBIBMARCChanges
+{
+ my $bibID = @_[0];
+ my $oldMARC = @_[1];
+ my $newMARC = @_[2];
+ my $newRecord = @_[3];
+
+ my $query = "
+ INSERT INTO bib_magic.bib_marc_update(record,prev_marc,changed_marc,job,new_record)
+ VALUES (\$1,\$2,\$3,\$4,\$5)";
+ my @values = ($bibID,$oldMARC,$newMARC,$jobid,$newRecord);
+ dbhandler_updateWithParameters($query,\@values);
+}
+
+sub mergeBIBs
+{
+
+ my $oldbib = @_[0];
+ my $newbib = @_[1];
+ my $eMatchString = @_[2];
+ my $statusID = @_[3];
+
+ return 0 if ($oldbib = $newbib); #short circuit if we are merging the same bibs together
+
+ updateJob("Processing","mergeBIBs oldbib: $oldbib newbib=$newbib eMatchString=$eMatchString");
+ logfile_addLine($log,"mergeBIBs oldbib: $oldbib newbib=$newbib eMatchString=$eMatchString") if $debug;
+
+ recordAssetCopyMove($oldbib,$newbib,$eMatchString,$statusID);
+ my $query = "INSERT INTO bib_magic.bib_merge(leadbib,subbib,statusid,job) VALUES($newbib,$oldbib,$statusID,$jobid)";
+ logfile_addLine($log,"MERGE:\t$newbib\t$oldbib") if $debug;
+
+ updateJob("Processing","mergeBIBs $query");
+
+ logfile_addLine($log,$query);
+ dbhandler_update($query);
+ #print "About to merge assets\n";
+ $query = "SELECT asset.merge_record_assets($newbib, $oldbib)";
+
+ updateJob("Processing","mergeBIBs $query");
+
+ logfile_addLine($log,$query);
+ dbhandler_query($query);
+ #print "Merged\n";
+}
+
+sub reCalcSha
+{
+ $bibsourceid = getbibsource();
+ my $query = "
+ SELECT bre.marc,sha1_full,sha1_mid,bre.id
+ FROM
+ biblio.record_entry bre
+ JOIN bib_magic.bib_sha1 eibs ON (eibs.bib=bre.id and not bre.deleted)
+ WHERE
+ eibs.bib_source = $bibsourceid";
+ logfile_addLine($log,$query);
+ my @results = @{dbhandler_query($query)};
+ my $count = 0;
+ print "Looping " . $#results . " records for sha recalculation\n";
+ foreach(@results)
+ {
+ my @row = @{$_};
+ my $marc = @row[0];
+ $marc =~ s/(<leader>.........)./${1}a/;
+ $marc = MARC::Record->new_from_xml($marc);
+ my $shafull_db = @row[1];
+ my $shamid_db = @row[2];
+ my $id = @row[3];
+ my $shafull_recalc = calcSHA1($marc);
+ my $shamid_recalc = calcSHA1($marc,1);
+ if( ($shafull_db ne $shafull_recalc) || ($shamid_db ne $shamid_recalc) )
+ {
+ $query = "UPDATE bib_magic.bib_sha1 SET sha1_full = \$1 , sha1_mid = \$2 WHERE bib = \$3";
+ my @vals = ($shafull_recalc, $shamid_recalc, $id);
+ logfile_addLine($log,$query."\n". Dumper(\@vals));
+ dbhandler_updateWithParameters($query, \@vals);
+ undef @vals;
+ }
+ undef $marc;
+ undef $shafull_db;
+ undef $shamid_db;
+ undef $shafull_recalc;
+ undef $shamid_recalc;
+ $count++;
+ print "$count done\n" if($count % 1000 == 0);
+ }
+
+ exit;
+}
+
+sub calcSHA1
+{
+ my $marc = shift;
+ my $babySHA = shift;
+ my $sha1 = Digest::SHA1->new;
+ $sha1->add( length(getsubfield($marc,'007',''))>6 ? substr( getsubfield($marc,'007',''),0,6) : '' );
+ $sha1->add(getsubfield($marc,'245','h'));
+ $sha1->add(getsubfield($marc,'001','')) if !$babySHA;
+ $sha1->add(getsubfield($marc,'245','a'));
+ $sha1->add(getsubfield($marc,'245','b'));
+ $sha1->add(getsubfield($marc,'100','a'));
+ return $sha1->hexdigest;
+}
+
+sub getsubfield
+{
+ my $marc = @_[0];
+ my $tag = @_[1];
+ my $subtag = @_[2];
+ my $ret;
+ #print "Extracting $tag $subtag\n";
+ if($marc->field($tag))
+ {
+ if($tag<10)
+ {
+ #print "It was less than 10 so getting data\n";
+ $ret = $marc->field($tag)->data();
+ }
+ elsif($marc->field($tag)->subfield($subtag))
+ {
+ $ret = $marc->field($tag)->subfield($subtag);
+ }
+ }
+ #print "got $ret\n";
+ $ret = utf8::is_utf8($ret) ? Encode::encode_utf8($ret) : $ret;
+ return $ret;
+}
+
+sub removeBibsEvergreen
+{
+ my $statusID = @_[0];
+ my $title = @_[1]; # will be filled in with the ISBN number if it's a removal via ISBN
+ my $z01 = @_[2];
+ my $sha1 = @_[3];
+ my $marcXML = @_[4];
+ my $removalsViaMARC = @_[5];
+
+ print "removeBibsEvergreen removalsViaMARC = $removalsViaMARC\n" if $debug;
+ my $query;
+
+ my $r =0;
+ my $removed = 0;
+ my $loops = 0;
+
+ updateJob("Processing","removeBibsEvergreen");
+
+ #print "Working on removeBibsEvergreen\n";
+
+ if( $removalsViaMARC )
+ {
+ my $marc = $marcXML;
+ $marc =~ s/(<leader>.........)./${1}a/;
+ $marc = MARC::Record->new_from_xml($marc);
+ my $bibid = findRecord($marc, $sha1, $z01);
+ if($bibid!=-1) #already exists
+ {
+ attemptRemoveBibs($bibid, $statusID);
+ }
+ else
+ {
+ $query = "update bib_magic.import_status set status = \$1 , processed = true, row_change_time = now() where id = \$2";
+ my @values = ('No matching bib in DB', $statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ }
+ $loops++;
+ }
+ else
+ {
+ my @marcOutputRecordsRemove = ();
+ logfile_addLine($log,"Removing bibs via ISBN");
+ my @ids = @{findMatchingISBN($title)};
+ push(@marcOutputRecordsRemove, @ids);
+ foreach(@marcOutputRecordsRemove)
+ {
+ my $bib = $_;
+ my @temp = @{$bib};
+ my $marc = @temp[1];
+ my $title = getsubfield($marc,'245','a');
+ logfile_addLine($log,"Removing $title ID=".@temp[0]);
+ my @pass = ([@{$bib}]);
+ attemptRemoveBibs(\@pass, $statusID);
+ $loops++;
+ }
+ if($#marcOutputRecordsRemove == -1)
+ {
+ $query = "update bib_magic.import_status set processed = true , status = \$1 , row_change_time = now() where id = \$2";
+ my @values = ("No matching bib in DB", $statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ }
+ }
+}
+
+sub importMARCintoEvergreen
+{
+ my $statusID = @_[0];
+ my $title = @_[1];
+ my $z01 = @_[2];
+ my $sha1 = @_[3];
+ my $marcXML = @_[4];
+
+ my $query;
+
+ updateJob("Processing","importMARCintoEvergreen");
+ my $marc = $marcXML;
+ $marc =~ s/(<leader>.........)./${1}a/;
+ $marc = MARC::Record->new_from_xml($marc);
+
+ $query = "update bib_magic.import_status set status = \$\$processing\$\$, row_change_time = now() where id=$statusID";
+ my @vals = ();
+ dbhandler_updateWithParameters($query,\@vals);
+
+ updateJob("Processing","updating 245h and 856z");
+ $marc = readyMARCForInsertIntoDB($marc);
+ my $bibid=-1;
+ my $bibid = findRecord($marc, $sha1, $z01);
+
+ if($bibid!=-1) #already exists so update the marc
+ {
+ chooseWinnerAndDeleteRest($bibid, $sha1, $marc, $title, $statusID);
+ }
+ elsif( !$conf{'do_not_import_new'} ) ##need to insert new bib instead of update
+ {
+
+ my $starttime = time;
+ my $max = getEvergreenMax();
+ my $thisXML = convertMARCtoXML($marc);
+ my @values = ($thisXML);
+ $query = "INSERT INTO BIBLIO.RECORD_ENTRY(fingerprint,last_xact_id,marc,quality,source,tcn_source,owner,share_depth) VALUES(null,'IMPORT-$starttime',\$1,null,$bibsourceid,\$\$$importBIBTagNameDB-script $sha1\$\$,null,null)";
+ logfile_addLine($log,$query);
+ my $res = dbhandler_updateWithParameters($query,\@values);
+ #print "$res";
+ my $newmax = getEvergreenMax("$importBIBTagNameDB-script $sha1", $max); # Get a more accurate ID in case the DB is busy right now
+ if( ($newmax != $max) && ($newmax > 0) )
+ {
+ logfile_addLine($log,"$newmax\thttp://$domainname/eg/opac/record/$newmax?locg=1;expand=marchtml#marchtml");
+ $query = "update bib_magic.import_status set status = \$1 , bib = \$2 , processed = true, row_change_time = now() where id = \$3";
+ @values = ('inserted', $newmax, $statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ updateDBSHA1($sha1, $newmax, $bibsourceid);
+ }
+ else
+ {
+ $query = "update bib_magic.import_status set status = \$1 , processed = true, row_change_time = now() where id = \$2";
+ @values = ('failed to insert', $statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ }
+ undef $starttime;
+ undef $max;
+ undef $thisXML;
+ undef @values;
+ undef $newmax;
+ }
+ else
+ {
+ logfile_addLine($log,"Skipping $statusID because it didn't match anything and script is configure to NOT IMPORT");
+ $query = "update bib_magic.import_status set status = \$1 , bib = \$2 , processed = true, row_change_time = now() where id = \$3";
+ my @values = ('skipped', -1, $statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ }
+ undef $statusID;
+ undef $title;
+ undef $z01;
+ undef $sha1;
+ undef $statusID;
+ undef $marcXML;
+ undef $query;
+ undef $marc;
+}
+
+sub importAuthority
+{
+ my $inputFile = @_[0];
+
+ updateJob("Processing","importAUTHORITYintoEvergreen");
+
+ my $rowID = 0;
+ my $query = "select id from bib_magic.import_status where job = $jobid and filename = \$\$$inputFile\$\$";
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my @row = @{$_};
+ $rowID = @row[0];
+ last;
+ }
+ return 0 if $rowID == 0; # This should exist, if not, play it safe and exit
+
+ $inputFile = "$archivefolder/$inputFile";
+
+ # Increase batch ID based upon how many came before
+ my $batchID = 0;
+ $query = "select count(*) from bib_magic.import_status where job=$jobid and status=\$\$finished\$\$";
+ logfile_addLogLine($log,$query);
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my @row = @{$_};
+ $batchID = @row[0];
+ }
+
+ my @previousBatchNames = ();
+
+ $query = "select marc_xml from bib_magic.import_status where job=$jobid and type=\$\$importauth\$\$";
+ logfile_addLogLine($log,$query);
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my @row = @{$_};
+ push @previousBatchNames, @row[0];
+ }
+
+ my $fullBatchName = "auth$jobid" . "_$batchID";
+
+ my $alreadyused = 1;
+
+ while($alreadyused)
+ {
+ $alreadyused = 0;
+ foreach(@previousBatchNames)
+ {
+ $alreadyused = 1 if($_ eq $fullBatchName);
+ $fullBatchName.='_0' if($_ eq $fullBatchName);
+ }
+ }
+
+ $query = "update bib_magic.import_status set marc_xml = \$1 , row_change_time = now() where id = \$2";
+ my @values = ($fullBatchName, $rowID);
+ dbhandler_updateWithParameters($query,\@values);
+
+ # we are going to use the eg_staged_bib_overlay tool to import the authority records. This tool needs to be available in the directory specified in the config file
+ my $bashOutputFile = $conf{"tempspace"}."/authload$jobid";
+ my $execScript = $conf{"eg_staged_bib_overlay_dir"}."/eg_staged_bib_overlay";
+
+ $query = "update bib_magic.import_status set status = \$1 , row_change_time = now() where id = \$2";
+ my @values = ('running stage_auths', $rowID);
+ dbhandler_updateWithParameters($query,\@values);
+
+ my $cmd = "$execScript --schema auth_load --batch $fullBatchName --db ".$conf{"db"}." --dbuser ".$conf{"dbuser"}." --dbhost ".$conf{"dbhost"}." --dbpw ".$conf{"dbpass"}." --action stage_auths $inputFile > $bashOutputFile";
+ logfile_addLogLine($log,$cmd);
+ system($cmd);
+
+ $query = "update bib_magic.import_status set status = \$1 , row_change_time = now() where id = \$2";
+ my @values = ('running match_auths', $rowID);
+ dbhandler_updateWithParameters($query,\@values);
+
+ $cmd = "$execScript --schema auth_load --batch $fullBatchName --db ".$conf{"db"}." --dbuser ".$conf{"dbuser"}." --dbhost ".$conf{"dbhost"}." --dbpw ".$conf{"dbpass"}." --action match_auths >> $bashOutputFile";
+ logfile_addLogLine($log,$cmd);
+ system($cmd);
+
+ $query = "update bib_magic.import_status set status = \$1 , row_change_time = now() where id = \$2";
+ my @values = ('running load_new_auths', $rowID);
+ dbhandler_updateWithParameters($query,\@values);
+
+ $cmd = "$execScript --schema auth_load --batch $fullBatchName --db ".$conf{"db"}." --dbuser ".$conf{"dbuser"}." --dbhost ".$conf{"dbhost"}." --dbpw ".$conf{"dbpass"}." --action load_new_auths >> $bashOutputFile";
+ logfile_addLogLine($log,$cmd);
+ system($cmd);
+
+ $query = "update bib_magic.import_status set status = \$1 , row_change_time = now() where id = \$2";
+ my @values = ('running overlay_auths_stage1', $rowID);
+ dbhandler_updateWithParameters($query,\@values);
+
+ $cmd = "$execScript --schema auth_load --batch $fullBatchName --db ".$conf{"db"}." --dbuser ".$conf{"dbuser"}." --dbhost ".$conf{"dbhost"}." --dbpw ".$conf{"dbpass"}." --action overlay_auths_stage1 >> $bashOutputFile";
+ logfile_addLogLine($log,$cmd);
+ system($cmd);
+
+ $query = "update bib_magic.import_status set status = \$1 , row_change_time = now() where id = \$2";
+ my @values = ('running overlay_auths_stage2', $rowID);
+ dbhandler_updateWithParameters($query,\@values);
+
+ $cmd = "$execScript --schema auth_load --batch $fullBatchName --db ".$conf{"db"}." --dbuser ".$conf{"dbuser"}." --dbhost ".$conf{"dbhost"}." --dbpw ".$conf{"dbpass"}." --action overlay_auths_stage2 >> $bashOutputFile";
+ logfile_addLogLine($log,$cmd);
+ system($cmd);
+
+ $query = "update bib_magic.import_status set status = \$1 , row_change_time = now() where id = \$2";
+ my @values = ('running overlay_auths_stage3', $rowID);
+ dbhandler_updateWithParameters($query,\@values);
+
+ $cmd = "$execScript --schema auth_load --batch $fullBatchName --db ".$conf{"db"}." --dbuser ".$conf{"dbuser"}." --dbhost ".$conf{"dbhost"}." --dbpw ".$conf{"dbpass"}." --action overlay_auths_stage3 >> $bashOutputFile";
+ logfile_addLogLine($log,$cmd);
+ system($cmd);
+
+ $query = "update bib_magic.import_status set status = \$1 , row_change_time = now() where id = \$2";
+ my @values = ('running link_auth_auth', $rowID);
+ dbhandler_updateWithParameters($query,\@values);
+
+ $cmd = "$execScript --schema auth_load --batch $fullBatchName --db ".$conf{"db"}." --dbuser ".$conf{"dbuser"}." --dbhost ".$conf{"dbhost"}." --dbpw ".$conf{"dbpass"}." --action link_auth_auth >> $bashOutputFile";
+ logfile_addLogLine($log,$cmd);
+ system($cmd);
+
+ $query = "update bib_magic.import_status set status = \$1 , processed = true, row_change_time = now() where id = \$2";
+ my @values = ('finished', $rowID);
+ dbhandler_updateWithParameters($query,\@values);
+}
+
+sub attemptRemoveBibs
+{
+
+ my @list = @{@_[0]};
+ my $statusID = @_[1];
+
+ # Reset the status column to be blank as we loop through all of the related bibs, appending results to status
+ my $query = "update bib_magic.import_status set status = \$\$\$\$ , row_change_time = now() where id = \$1";
+ my @values = ($statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ foreach(@list)
+ {
+ my @attrs = @{$_};
+ my $id = @attrs[0];
+ my $marcobj = @attrs[1];
+ my $score = @attrs[2];
+ my $marcxml = @attrs[3];
+ my $answer = decideToDeleteOrRemove9($marcobj);
+ if( $answer eq '1' )
+ {
+ my $query = "SELECT ID,BARCODE FROM ASSET.COPY WHERE CALL_NUMBER IN
+ (SELECT ID FROM ASSET.CALL_NUMBER WHERE RECORD=$id) AND NOT DELETED";
+ logfile_addLine($log,$query);
+ my @results = @{dbhandler_query($query)};
+ if($#results > -1) # There are non-deleted copies attached
+ {
+ $query = "update bib_magic.import_status set status = status || \$\$[ $id - \$\$ || \$1 || \$\$]\$\$ , bib = \$2 , row_change_time = now() where id = \$3";
+ my @values = ('failed to removed bib due to copies attached', $id, $statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ }
+ if($#results == -1)
+ {
+ my $query = "UPDATE BIBLIO.RECORD_ENTRY SET DELETED=\$\$t\$\$ WHERE ID = \$1";
+ logfile_addLine($log,$query);
+ my @values = ($id);
+ my $res = dbhandler_updateWithParameters($query,\@values);
+ if($res)
+ {
+ $query = "update bib_magic.import_status set status = status || \$\$[ $id - \$\$ || \$1 || \$\$]\$\$ , bib = \$2 , row_change_time = now() where id = \$3";
+ @values = ('removed bib', $id, $statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ }
+ else
+ {
+ $query = "update bib_magic.import_status set status = status || \$\$[ $id - \$\$ || \$1 || \$\$]\$\$ , bib = \$2 , row_change_time = now() where id = \$3";
+ @values = ('failed to removed bib', $id, $statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ }
+ }
+ }
+ else
+ {
+ my $finalMARCXML = convertMARCtoXML($answer);
+ recordBIBMARCChanges($id, $marcxml, $finalMARCXML,0);
+ my @values = ($finalMARCXML);
+ my $query = "UPDATE BIBLIO.RECORD_ENTRY SET marc=\$1 WHERE ID=$id";
+
+ updateJob("Processing","chooseWinnerAndDeleteRest $query");
+
+ logfile_addLine($log,$query);
+ logfile_addLine($log,"$id\thttp://$domainname/eg/opac/record/$id?locg=1;expand=marchtml#marchtml\thttp://$domainname/eg/opac/record/$id?locg=1;expand=marchtml#marchtml\t0");
+ my $res = dbhandler_updateWithParameters($query,\@values);
+ if($res)
+ {
+ $query = "update bib_magic.import_status set status = status || \$\$[ $id - \$\$ || \$1 || \$\$]\$\$ , bib = \$2 , row_change_time = now() where id = \$3";
+ @values = ('removed related 856', $id, $statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ }
+ else
+ {
+ $query = "update bib_magic.import_status set status = status || \$\$[ $id - \$\$ || \$1 || \$\$]\$\$ , bib = \$2 , row_change_time = now() where id = \$3";
+ @values = ('failed to remove related 856', $id, $statusID);
+ dbhandler_updateWithParameters($query,\@values);
+ }
+ }
+ }
+ $query = "update bib_magic.import_status set processed = true , row_change_time = now() where id = \$1";
+ @values = ($statusID);
+ dbhandler_updateWithParameters($query,\@values);
+}
+
+sub decideToDeleteOrRemove9
+{
+ my $marc = @_[0];
+ my @eight56s = $marc->field("856");
+ my @eights;
+ my $original856 = $#eight56s + 1;
+
+ my %urls;
+ my $nonMatch = 0;
+ foreach(@eight56s)
+ {
+ my $thisField = $_;
+ my $ind2 = $thisField->indicator(2);
+ if($ind2 eq '0')
+ {
+ my @ninposes = ();
+ my $poses=0;
+ #deleting subfields requires knowledge of what position among all of the subfields they reside.
+ #so we have to record at what positions each of the 9's are ahead of time.
+ foreach($thisField->subfields())
+ {
+ my @f = @{$_};
+ if(@f[0] eq '9')
+ {
+ push (@ninposes, $poses);
+ }
+ $poses++;
+ }
+ my @nines = $thisField->subfield("9");
+ my @delete9s = ();
+ my $ninePos = 0;
+ my $nonMatchThis856 = 0;
+ foreach(@nines)
+ {
+ my $looking = $_;
+ my $found = 0;
+ foreach(@shortnames)
+ {
+ if($looking eq $_)
+ {
+ $found=1;
+ }
+ }
+ if($found)
+ {
+ push(@delete9s, @ninposes[$ninePos]);
+ }
+ else
+ {
+ $nonMatch=1;
+ $nonMatchThis856 = 1;
+ }
+ $ninePos++;
+ }
+ if(!$nonMatchThis856)
+ {
+ #Just delete the whole field because it only contains these 9's
+ $marc->delete_field($thisField);
+ }
+ else
+ {
+ #Some of the 9's do not belong to this group, so we just want to delete ours
+ #and preserve the record
+ $thisField->delete_subfield(code=> '9', 'pos' => \@delete9s) if ($#delete9s > -1);
+ }
+ undef @ninposes;
+ undef @delete9s;
+ undef $nonMatchThis856;
+ }
+
+ }
+ if(!$nonMatch) #all of the 9s on this record belong to this group and no one else, time to delete the record
+ {
+ return 1;
+ }
+ #There were some 9s for other groups, just remove ours and preserve the record
+ return $marc;
+
+}
+
+sub findMatchingISBN
+{
+ my $isbn = @_[0];
+ my @ret = ();
+
+ my $query = "SELECT ID,MARC FROM BIBLIO.RECORD_ENTRY WHERE
+ tcn_source~\$\$$importBIBTagNameDB-script\$\$ AND
+ source=$bibsourceid AND
+ NOT DELETED AND
+ ID IN(
+ select source from metabib.identifier_field_entry
+ WHERE
+ index_vector @@ to_tsquery(\$\$$isbn\$\$)
+ )
+ ";
+ # logfile_addLine($log,$query);
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ my $id = @row[0];
+ my $marc = @row[1];
+ my $marcobj = $marc;
+ $marcobj =~ s/(<leader>.........)./${1}a/;
+ my $marcobj = MARC::Record->new_from_xml($marcobj);
+ my $score = 0;
+ my @arr = ($id,$marcobj,$score,$marc);
+ push (@ret, [@arr]);
+ }
+ return \@ret;
+}
+
+sub chooseWinnerAndDeleteRest
+{
+ my @list = @{@_[0]};
+ my $sha1 = @_[1];
+ my $newMarc = @_[2];
+ my $title = @_[3];
+ my $statusID = @_[4];
+ my $chosenWinner = 0;
+ my $bestScore=0;
+ my $finalMARC;
+ my $i=0;
+ my $winnerBibID;
+ my $winnerOGMARCxml;
+ my $matchnum = $#list+1;
+ my $eMatchString = $newMarc->field('001') ? $newMarc->field('001')->data() : '';
+ foreach(@list)
+ {
+ my @attrs = @{$_};
+ my $id = @attrs[0];
+ my $score = @attrs[2];
+ my $marcxml = @attrs[3];
+ if($score>$bestScore)
+ {
+ $bestScore=$score;
+ $chosenWinner=$i;
+ $winnerBibID = $id;
+ $winnerOGMARCxml = $marcxml;
+ }
+ $i++;
+ undef @attrs;
+ undef $id;
+ undef $score;
+ undef $marcxml;
+ }
+
+ if($conf{'import_as_is'})
+ {
+ $finalMARC = $newMarc;
+ }
+ else
+ {
+ $finalMARC = @{@list[$chosenWinner]}[1];
+ $i=0;
+ #
+ # This loop is merging all of the existing bibs in the database into the winning bib ID (in the database)
+ # And before it merges them, it soaks up the 856's from the about-to-be deleted bib into $finalMARC
+ #
+ foreach(@list)
+ {
+ my @attrs = @{$_};
+ my $id = @attrs[0];
+ my $marc = @attrs[1];
+ my $marcxml = @attrs[3];
+ if($i!=$chosenWinner)
+ {
+ $finalMARC = mergeMARC856($finalMARC, $marc);
+ mergeBIBs($id, $winnerBibID, $eMatchString, $statusID);
+ }
+ $i++;
+ undef @attrs;
+ undef $id;
+ undef $marc;
+ undef $marcxml;
+ }
+ # Marc manipulations need to be ran upon the target bib in the DB as well.
+ $finalMARC = readyMARCForInsertIntoDB($finalMARC);
+
+ # here we prefer the incoming file MARC as "the" marc, but we need the gathered 856's.
+ # which is why it's passed as the second argument
+ # At this point, the 9's have been added to the newMarc (data from e_bib_import)
+ $finalMARC = mergeMARC856($newMarc, $finalMARC);
+ }
+
+ my $newmarcforrecord = convertMARCtoXML($finalMARC);
+ print "Headed into recordBIBMARCChanges\n" if $debug;
+ recordBIBMARCChanges($winnerBibID, $winnerOGMARCxml, $newmarcforrecord, 0);
+
+ my $thisXML = convertMARCtoXML($finalMARC);
+ my @values = ();
+ my $query = "";
+ if($conf{'tcn_source_authority'})
+ {
+ $query = "UPDATE BIBLIO.RECORD_ENTRY SET tcn_source = \$1 , source = \$2 , marc = \$3 WHERE ID = \$4";
+ @values = ("$importBIBTagNameDB-script $sha1", $bibsourceid, $thisXML, $winnerBibID);
+ }
+ else
+ {
+ $query = "UPDATE BIBLIO.RECORD_ENTRY SET source = \$1 , marc = \$2 WHERE ID = \$3";
+ @values = ($bibsourceid, $thisXML, $winnerBibID);
+ }
+
+ $query = "UPDATE BIBLIO.RECORD_ENTRY SET marc = \$1 WHERE ID = \$2" if $conf{'import_as_is'}; # do not update the source if as is
+ @values = ($thisXML, $winnerBibID) if $conf{'import_as_is'}; # do not update the source if as is
+
+ print "Updating MARC XML in DB BIB $winnerBibID\n" if $debug;
+ updateJob("Processing","chooseWinnerAndDeleteRest $query");
+
+ # logfile_addLine($log,$thisXML);
+ logfile_addLine($log,"$winnerBibID\thttp://$domainname/eg/opac/record/$winnerBibID?locg=1;expand=marchtml#marchtml\thttp://$domainname/eg/opac/record/$winnerBibID?locg=1;expand=marchtml#marchtml\t$matchnum");
+ my $res = dbhandler_updateWithParameters($query,\@values);
+ #print "$res\n";
+ if($res)
+ {
+ $query = "update bib_magic.import_status set status = \$1, processed = true , bib = \$2 , row_change_time = now() where id = \$3";
+ my @vals = ('matched and overlayed',$winnerBibID,$statusID);
+ dbhandler_updateWithParameters($query,\@vals);
+ if($conf{'tcn_source_authority'})
+ {
+ my @shas = split(/\s/,$sha1);
+ for my $i (0..$#shas)
+ {
+ my $shacol = "sha1_full";
+ $shacol = "sha1_mid" if ($i == 1);
+ $query = "UPDATE bib_magic.bib_sha1 SET $shacol = \$1 WHERE bib = \$2 AND bib_source = \$3 AND $shacol != \$4";
+ @vals = (@shas[$i], $winnerBibID, $bibsourceid, @shas[$i]);
+ dbhandler_updateWithParameters($query,\@vals);
+ }
+ undef @shas;
+ }
+ }
+ else
+ {
+ $query = "update bib_magic.import_status set status = \$1, processed = true, row_change_time = now() where id = \$2";
+ my @vals = ('failed',$statusID);
+ dbhandler_updateWithParameters($query,\@vals);
+ }
+
+ undef @list;
+ undef $sha1;
+ undef $newMarc;
+ undef $title;
+ undef $statusID;
+ undef $chosenWinner;
+ undef $bestScore;
+ undef $finalMARC;
+ undef $i;
+}
+
+sub findRecord
+{
+ my $marcsearch = @_[0];
+ my $sha1 = @_[1];
+ my $zero01 = @_[2];
+
+ my @ret;
+ my $none=1;
+ my $foundIDs;
+ my $count=0;
+ my @shas = split(/\s/,$sha1);
+ my $query = "";
+
+ if( $match901c && $marcsearch->subfield('901',"c") )
+ {
+ $query = "
+ select
+ bre.id,
+ bre.marc from
+ biblio.record_entry bre
+ where
+ bre.id=" . $marcsearch->subfield('901',"c");
+ my $fetch = getMatchingMARC($query, 'sha1');
+ if(ref $fetch eq 'ARRAY')
+ {
+ $none = 0;
+ @ret = @{dedupeMatchArray(\@ret, $fetch)};
+ }
+ }
+ else
+ {
+ for my $i (0..$#shas)
+ {
+ my $shacol = "sha1_full";
+ $shacol = "sha1_mid" if $i == 1;
+
+ print "Searching for sha1 match @shas[$i]\n" if $debug;
+ $query = "
+ select
+ bre.id,
+ bre.marc from
+ biblio.record_entry bre,
+ bib_magic.bib_sha1 ebs
+ where
+ bre.id=ebs.bib and
+ ebs.$shacol = \$sha\$@shas[$i]\$sha\$ and
+ ebs.bib_source=$bibsourceid and
+ not bre.deleted and
+ bre.id > -1
+ ";
+
+ $query.="
+ union all
+
+ select bre.id,bre.marc
+ from
+ biblio.record_entry bre left join bib_magic.bib_sha1 ebs on(ebs.bib=bre.id)
+ where
+ not bre.deleted and
+ bre.id > -1 and
+ bre.source=$bibsourceid and
+ bre.tcn_source~\$sha\$@shas[$i]\$sha\$ and
+ ebs.bib is null
+ " if $searchDeepMatch;
+
+ my $fetch = getMatchingMARC($query, 'sha1');
+ if(ref $fetch eq 'ARRAY')
+ {
+ $none = 0;
+ @ret = @{dedupeMatchArray(\@ret, $fetch)};
+ }
+ }
+ }
+
+ foreach(@ret)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ my $id = @row[0];
+ $foundIDs.="$id,";
+ $none=0;
+ $count++;
+ }
+
+
+ if($zero01)
+ {
+ # fail safe, so that we don't match a huge number of marc records based upon a super tiny 001
+ # We are requiring at least 6 non-whitespace characters to appear in the 001 for matching
+ my $z01Check = $zero01;
+ $z01Check =~ s/[\s\t]//g;
+ if($searchDeepMatch && length($z01Check) > 5) ## This matches other bibs based upon the vendor's 001 which is usually moved to the 035, hence MARC ~
+ {
+ $foundIDs = substr($foundIDs,0,-1);
+ if(length($foundIDs)<1)
+ {
+ $foundIDs="-1";
+ }
+ my $query = "SELECT id,marc FROM biblio.record_entry WHERE marc ~ \$\$$zero01\$\$ AND id NOT IN($foundIDs) AND deleted IS FALSE ";
+ my $fetch = getMatchingMARC($query, '001');
+ if(ref $fetch eq 'ARRAY')
+ {
+ $none = 0;
+ my $c = $#ret;
+ @ret = @{dedupeMatchArray(\@ret, $fetch)};
+ $c = $#ret - $c;
+ $count += $c;
+ }
+ }
+ }
+
+ if($none)
+ {
+ print "Didn't find one\n" if $debug;
+ return -1;
+ }
+ print "Count matches: $count\n" if $debug;
+ updateJob("Processing","Count matches: $count");
+
+ return \@ret;
+}
+
+sub dedupeMatchArray
+{
+ my $establishedArr = shift;
+ my $incArray = shift;
+ my %exists = ();
+ if( (ref $incArray eq 'ARRAY') && (ref $establishedArr eq 'ARRAY') )
+ {
+ my @est = @{$establishedArr};
+ my @inc = @{$incArray};
+ foreach(@est)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ $exists{@row[0]} = 1;
+ }
+ foreach(@inc)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ if( !$exists{@row[0]} )
+ {
+ $exists{@row[0]} = 1;
+ push (@est, [@row]);
+ }
+ }
+ $establishedArr = \@est;
+ }
+
+ return $establishedArr;
+}
+
+sub getMatchingMARC
+{
+ my $query = shift;
+ my $type = shift;
+ my @ret = ();
+ updateJob("Processing","$query");
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ my $id = @row[0];
+ print "found matching $type: $id\n" if $debug;
+ my $marc = @row[1];
+ my $prevmarc = $marc;
+ $prevmarc =~ s/(<leader>.........)./${1}a/;
+ $prevmarc = MARC::Record->new_from_xml($prevmarc);
+ my $score = scoreMARC($prevmarc,$log);
+ my @matched = ($id,$prevmarc,$score,$marc);
+ push (@ret, [@matched]);
+ }
+
+ return \@ret if $#ret > -1;
+ return 0;
+}
+
+sub readyMARCForInsertIntoDB
+{
+ my $marc = @_[0];
+ return $marc if $conf{'import_as_is'};
+ $marc = fixLeader($marc);
+
+ while ( (my $type, my $array) = each(%marcEdits) )
+ {
+ if( (ref $array eq 'ARRAY') && (@{$array}[0]) )
+ {
+ foreach(@{$array})
+ {
+ # print Dumper($_);
+ if(ref $_ eq 'HASH')
+ {
+ my %thisOne = %{$_};
+ my @def = @{$thisOne{'def'}};
+ my $howmany = $thisOne{'howmany'} || '1';
+ $marc = doMARCEdit(\@def, $marc, $type, $howmany);
+ }
+ }
+ }
+ }
+
+ return $marc;
+}
+
+sub doMARCEdit
+{
+ my $def = shift;
+ my $marc = shift;
+ my $type = shift;
+ my $howmany = shift;
+
+ my @def = @{$def};
+
+ if($type eq 'remove')
+ {
+ foreach(@def)
+ {
+ my @splits = split(/_/,$_);
+ my $fieldDef = shift @splits;
+ my $ind1Def = shift @splits;
+ my $ind2Def = shift @splits;
+ my $subfieldDef = shift @splits;
+ my @f = @{findMatchingFields($marc, $fieldDef, $ind1Def, $ind2Def, $subfieldDef)};
+
+ if($#f > -1) # without this, perl adds $#f + 1, it resolves to "1" when $#f = -1
+ {
+ my $pos = 0;
+ $howmany = $#f + 1 if($howmany eq 'all');
+ while($pos < $howmany)
+ {
+ $marc->delete_field(@f[$pos]);
+ $pos++;
+ }
+ }
+ }
+ }
+ elsif($type eq 'removesubfield' && $marc->field(@def[0]))
+ {
+ print "Deleting subfield ".@def[0]." ".@def[1]."\n" if $debug;
+ my @field = $marc->field(@def[0]);
+ $marc->delete_field(@field);
+ my $pos = 0;
+ $howmany = $#field + 1 if($howmany eq 'all');
+ while($pos < $howmany)
+ {
+ my $thisField = @field[$pos];
+
+ if($thisField->subfield(@def[1]))
+ {
+ $thisField->delete_subfield(code => @def[1]);
+ }
+ my @theRest = $thisField->subfields();
+ $marc->insert_grouped_field($thisField) if(@theRest[0]);
+ undef @theRest;
+ $pos++;
+ }
+ while($pos < $#field + 1)
+ {
+ my $thisField = @field[$pos];
+ $marc->insert_grouped_field($thisField);
+ $pos++;
+ }
+ }
+ elsif( ($type eq 'add') || ($type eq 'replace') )
+ {
+ my $numCheck = @def[0] + 0;
+ my $field;
+ $field = MARC::Field->new(@def[0], '') if($numCheck < 10);
+ $field = MARC::Field->new(@def[0],' ',' ', 'a' => '') if($numCheck > 9);
+ $field->delete_subfield('a') if($numCheck > 9);
+
+ if($type eq 'replace' && $marc->field(@def[0]))
+ {
+ my @fields = $marc->field(@def[0]);
+ my $pos = 0;
+ $howmany = $#fields + 1 if($howmany eq 'all');
+ while($pos < $howmany)
+ {
+ $field = @fields[$pos];
+ $marc = doMARCEdit_Field($field, $marc, $numCheck, \@def, 1);
+ $pos++;
+ }
+ }
+ else
+ {
+ $marc = doMARCEdit_Field($field, $marc, $numCheck, \@def, 0);
+ }
+ }
+
+ return $marc;
+}
+
+sub doMARCEdit_Field
+{
+ my $field = shift;
+ my $marc = shift;
+ my $numCheck = shift;
+ my $def = shift;
+ my $alreadyExists = shift || 0;
+
+ my @def = @{$def};
+ if($numCheck < 10)
+ {
+ my $data = $field->data();
+ $data = mobutil_insertDataIntoColumn($data, @def[2], @def[1]);
+
+ $field->update($data);
+ $marc->insert_grouped_field($field) if !$alreadyExists;
+ }
+ else
+ {
+ my @ind = ('', @def[1], @def[2]);
+ for my $i (1..$#ind)
+ {
+ if(@ind[$i] ne 'same')
+ {
+ $field->set_indicator($i, @ind[$i]);
+ }
+ }
+ my @subfield = $field->subfield( @def[3] );
+ if(@subfield[0])
+ {
+ $field->delete_subfields(@def[3]);
+ #only deal with the first one
+ shift @subfield;
+ push (@subfield, @def[4]);
+ $field->add_subfields(@def[3] => $_) foreach(@subfield);
+ }
+ else
+ {
+ $field->add_subfields(@def[3] => @def[4]);
+ }
+ $marc->insert_grouped_field($field) if !$alreadyExists;
+ }
+ return $marc;
+}
+
+sub findMatchingFields
+{
+ my $marc = shift;
+ my $fieldDef = shift;
+ my $ind1Def = shift;
+ my $ind2Def = shift;
+ my $subfieldDef = shift;
+ my @ret = ();
+ my @fields = $marc->field($fieldDef);
+ if($subfieldDef)
+ {
+ foreach(@fields)
+ {
+ my %comps = ( $ind1Def => $_->indicator(1), $ind2Def => $_->indicator(2) );
+ my $passedIndicatorTests = 1;
+ while ( (my $key, my $value) = each(%comps) )
+ {
+ if($key eq 'none') # Definition means only non-defined or null value indicators are allowed
+ {
+ $passedIndicatorTests = 0 if($value eq '0'); # handle the case when the indicator is '0' and we defined "none". Not the same :)
+ if($value ne '0')
+ {
+ $passedIndicatorTests = 0 if ($value && !($value =~ m/^[\\\/\s]$/) && ($value ne 'undef'));
+ }
+ }
+ elsif($key ne 'all')
+ {
+ $passedIndicatorTests = 0 if !($key eq $value);
+ }
+ }
+ if($passedIndicatorTests)
+ {
+ if($subfieldDef ne 'all')
+ {
+ my $subfield = $_->subfield($subfieldDef);
+ push (@ret, $_) if $subfield;
+ }
+ else
+ {
+ push (@ret, $_);
+ }
+ }
+ }
+ }
+ else
+ {
+ return \@fields;
+ }
+ return \@ret;
+}
+
+sub mergeMARC856
+{
+ my $marc = @_[0];
+ my $marc2 = @_[1];
+
+ my @eight56s = $marc->field("856");
+ my @eight56s_2 = $marc2->field("856");
+ my @eights;
+ my $original856 = $#eight56s + 1;
+ @eight56s = (@eight56s,@eight56s_2);
+
+ my %urls;
+ foreach(@eight56s)
+ {
+ my $thisField = $_;
+ my $ind2 = $thisField->indicator(2);
+ # Just read the first $u and $z
+ my $u = $thisField->subfield("u");
+ my $z = $thisField->subfield("z");
+ my $s7 = $thisField->subfield("7");
+
+ if($u) #needs to be defined because its the key
+ {
+ if(!$urls{$u})
+ {
+ if($ind2 ne '0')
+ {
+ $thisField->delete_subfields('9');
+ $thisField->delete_subfields('z');
+ }
+ $urls{$u} = $thisField;
+ }
+ else
+ {
+ my @nines = $thisField->subfield("9");
+ my $otherField = $urls{$u};
+ my @otherNines = $otherField->subfield("9");
+ my $otherZ = $otherField->subfield("z");
+ my $other7 = $otherField->subfield("7");
+ if(!$otherZ)
+ {
+ if($z)
+ {
+ $otherField->add_subfields('z'=>$z);
+ }
+ }
+ if(!$other7)
+ {
+ if($s7)
+ {
+ $otherField->add_subfields('7'=>$s7);
+ }
+ }
+ if($conf{"merge_9s"})
+ {
+ foreach(@nines)
+ {
+ my $looking = $_;
+ my $found = 0;
+ foreach(@otherNines)
+ {
+ if($looking eq $_)
+ {
+ $found=1;
+ }
+ }
+ if($found==0 && $ind2 eq '0')
+ {
+ $otherField->add_subfields('9' => $looking);
+ }
+ }
+ }
+ if($ind2 ne '0')
+ {
+ $thisField->delete_subfields('9');
+ $thisField->delete_subfields('z');
+ }
+
+ $urls{$u} = $otherField;
+ }
+ }
+
+ }
+
+ my $finalCount = scalar keys %urls;
+ if($original856 != $finalCount)
+ {
+ logfile_addLine($log,"There was $original856 and now there are $finalCount");
+ }
+
+ my $dump1=Dumper(\%urls);
+ my @remove = $marc->field('856');
+ #logfile_addLine($log,"Removing ".$#remove." 856 records");
+ $marc->delete_fields(@remove);
+
+ while ((my $internal, my $mvalue ) = each(%urls))
+ {
+ $marc->insert_grouped_field( $mvalue );
+ }
+ return $marc;
+}
+
+sub getEvergreenMax
+{
+ my $sha1 = shift;
+ my $lastMax = shift;
+ my $query = "SELECT MAX(id) FROM biblio.record_entry";
+
+ $query .= " WHERE" if($sha1 || $lastMax);
+ $query .= " id > $lastMax" if($lastMax);
+ $query .= " AND " if($sha1 && $lastMax);
+ $query .= " tcn_source = \$tcn_source\$$sha1\$tcn_source\$" if($sha1);
+
+ updateJob("Processing", $query) if($debug);
+ logfile_addLine($log,$query) if($debug);
+
+ my @results = @{dbhandler_query($query)};
+ my $dbmax = 0;
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ $dbmax = @row[0];
+ }
+ return $dbmax;
+}
+
+sub convertMARCtoXML
+{
+ my $marc = @_[0];
+ my $thisXML = $marc->as_xml(); #decode_utf8();
+
+ #this code is borrowed from marc2bre.pl
+ $thisXML =~ s/\n//sog;
+ $thisXML =~ s/^<\?xml.+\?\s*>//go;
+ $thisXML =~ s/>\s+</></go;
+ $thisXML =~ s/\p{Cc}//go;
+ $thisXML = OpenILS::Application::AppUtils->entityize($thisXML);
+ $thisXML =~ s/[\x00-\x1f]//go;
+ $thisXML =~ s/^\s+//;
+ $thisXML =~ s/\s+$//;
+ $thisXML =~ s/<record><leader>/<leader>/;
+ $thisXML =~ s/<collection/<record/;
+ $thisXML =~ s/<\/record><\/collection>/<\/record>/;
+
+ #end code
+ return $thisXML;
+}
+
+sub getbibsource
+{
+ my $squery = "SELECT ID FROM CONFIG.BIB_SOURCE WHERE SOURCE = \$\$$importSourceName\$\$";
+ my @results = @{dbhandler_query($squery)};
+ if($#results==-1 && !$conf{'import_as_is'})
+ {
+ print "Didnt find '$importSourceName' in bib_source, now creating it...\n";
+ my $query = "INSERT INTO CONFIG.BIB_SOURCE(QUALITY,SOURCE) VALUES(90,\$\$$importSourceName\$\$)";
+ my $res = dbhandler_update($query);
+ print "Update results: $res\n";
+ @results = @{dbhandler_query($squery)};
+ }
+
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ return @row[0];
+ }
+ return 1;
+}
+
+sub createNewJob
+{
+ my $status = @_[0];
+ my $query = "INSERT INTO bib_magic.job(status) values('$status')";
+ my $results = dbhandler_update($query);
+ if($results)
+ {
+ $query = "SELECT max( ID ) FROM bib_magic.job";
+ my @results = @{dbhandler_query($query)};
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ $jobid = @row[0];
+ return @row[0];
+ }
+ }
+ return -1;
+}
+
+sub updateJob
+{
+ my $status = @_[0];
+ my $action = @_[1];
+ my $query = "UPDATE bib_magic.job SET last_update_time=now(),status=\$1, CURRENT_ACTION_NUM = CURRENT_ACTION_NUM+1,current_action=\$2 where id=\$3";
+ logfile_addLine($log,$action) if $debug;
+ my @vals = ($status,$action,$jobid);
+ my $results = dbhandler_updateWithParameters($query,\@vals);
+ return $results;
+}
+
+## Leaving this function for debugging purposes. It's not called anywhere but could be useful
+sub findMatchInArchive
+{
+ my @matchList = @{@_[0]};
+ my @files;
+
+ #Get all files in the directory path
+ @files = @{dirtrav(\@files,$archivefolder)};
+ my @ret;
+
+ for my $b(0..$#files)
+ {
+ my $file = MARC::File::USMARC->in($files[$b]);
+ while ( my $marc = $file->next() )
+ {
+ my $t = $marc->leader();
+ my $su=substr($marc->leader(),6,1);
+ print "Leader:\n$t\n$su\n";
+ if(1)#$su eq 'a')
+ {
+ my $all = $marc->as_formatted();
+ foreach(@matchList)
+ {
+ if($all =~ m/$_/g)
+ {
+ my @booya = ($files[$b]);
+ push(@ret,$files[$b]);
+ print "This one: ".$files[$b]." matched '$_'\n";
+ return \@booya;
+ }
+ }
+ }
+ }
+ $file->close();
+ undef $file;
+ }
+}
+
+sub dirtrav
+{
+ my @files = @{@_[0]};
+ my $pwd = @_[1];
+ opendir(DIR,"$pwd") or die "Cannot open $pwd\n";
+ my @thisdir = readdir(DIR);
+ closedir(DIR);
+ foreach my $file (@thisdir)
+ {
+ if(($file ne ".") and ($file ne ".."))
+ {
+ if (-d "$pwd/$file")
+ {
+ push(@files, "$pwd/$file");
+ @files = @{dirtrav(\@files,"$pwd/$file")};
+ }
+ elsif (-f "$pwd/$file")
+ {
+ push(@files, "$pwd/$file");
+ }
+ }
+ }
+ return \@files;
+}
+
+sub fixLeader
+{
+ my $marc = @_[0];
+ #This is disabled because Shon did not want to change the icon in the catalog
+ return $marc;
+ my $fullLeader = $marc->leader();
+ if(substr($fullLeader,6,1) eq 'a')
+ {
+ #print "Leader has an a:\n$fullLeader";
+ $fullLeader = substr($fullLeader,0,6).'m'.substr($fullLeader,7);
+ $marc->leader($fullLeader);
+ my $fullLeader = $marc->leader();
+ #print "Changed to:\n$fullLeader";
+ }
+ return $marc;
+}
+
+sub scoreMARC
+{
+ my $marc = shift;
+
+ my $score = 0;
+ $score+= score($marc,2,100,400,'245');
+ $score+= score($marc,1,1,150,'100');
+ $score+= score($marc,1,1.1,150,'110');
+ $score+= score($marc,0,50,200,'6..');
+ $score+= score($marc,0,50,100,'02.');
+
+ $score+= score($marc,0,100,200,'246');
+ $score+= score($marc,0,100,100,'130');
+ $score+= score($marc,0,100,100,'010');
+ $score+= score($marc,0,100,200,'490');
+ $score+= score($marc,0,10,50,'830');
+
+ $score+= score($marc,1,.5,50,'300');
+ $score+= score($marc,0,1,100,'7..');
+ $score+= score($marc,2,2,100,'50.');
+ $score+= score($marc,2,2,100,'52.');
+
+ $score+= score($marc,2,.5,200,'51.', '53.', '54.', '55.', '56.', '57.', '58.');
+
+ return $score;
+}
+
+sub score
+{
+ my ($marc) = shift;
+ my ($type) = shift;
+ my ($weight) = shift;
+ my ($cap) = shift;
+ my @tags = @_;
+ my $ou = Dumper(@tags);
+ #logfile_addLine($log,"Tags: $ou\n\nType: $type\nWeight: $weight\nCap: $cap");
+ my $score = 0;
+ if($type == 0) #0 is field count
+ {
+ #logfile_addLine($log,"Calling count_field");
+ $score = count_field($marc,\@tags);
+ }
+ elsif($type == 1) #1 is length of field
+ {
+ #logfile_addLine($log,"Calling field_length");
+ $score = field_length($marc,\@tags);
+ }
+ elsif($type == 2) #2 is subfield count
+ {
+ #logfile_addLine($log,"Calling count_subfield");
+ $score = count_subfield($marc,\@tags);
+ }
+ $score = $score * $weight;
+ if($score > $cap)
+ {
+ $score = $cap;
+ }
+ $score = int($score);
+ #logfile_addLine($log,"Weight and cap applied\nScore is: $score");
+ return $score;
+}
+
+sub count_subfield
+{
+ my ($marc) = $_[0];
+ my @tags = @{$_[1]};
+ my $total = 0;
+ #logfile_addLine($log,"Starting count_subfield");
+ foreach my $tag (@tags)
+ {
+ my @f = $marc->field($tag);
+ foreach my $field (@f)
+ {
+ my @subs = $field->subfields();
+ my $ou = Dumper(@subs);
+ #logfile_addLine($log,$ou);
+ if(@subs)
+ {
+ $total += scalar(@subs);
+ }
+ }
+ }
+ #logfile_addLine($log,"Total Subfields: $total");
+ return $total;
+
+}
+
+sub count_field
+{
+ my ($marc) = $_[0];
+ my @tags = @{$_[1]};
+ my $total = 0;
+ foreach my $tag (@tags)
+ {
+ my @f = $marc->field($tag);
+ $total += scalar(@f);
+ }
+ return $total;
+}
+
+sub field_length
+{
+ my ($marc) = $_[0];
+ my @tags = @{$_[1]};
+
+ my @f = $marc->field(@tags[0]);
+ return 0 unless @f;
+ my $len = length($f[0]->as_string);
+ my $ou = Dumper(@f);
+ #logfile_addLine($log,$ou);
+ #logfile_addLine($log,"Field Length: $len");
+ return $len;
+}
+
+sub updateDBSHA1
+{
+ my $sha1 = shift;
+ my $bibid = shift;
+ my $source = shift;
+ my $query = "select count(*) from bib_magic.bib_sha1 where bib=$bibid";
+ my @results = @{dbhandler_query($query)};
+ my @values = ();
+ my @shas = split(/\s/, $sha1);
+ my $count=0;
+ foreach(@results)
+ {
+ my $row = $_;
+ my @row = @{$row};
+ $count = @row[0];
+ }
+ if($count > 0)
+ {
+ $query = "update bib_magic.bib_sha1 set sha1_full = \$1 , sha1_mid = \$2 where bib = \$3";
+ updateJob($query);
+ @values = (@shas[0], @shas[1], $bibid);
+ }
+ else
+ {
+ $query = "insert into bib_magic.bib_sha1(bib,bib_source,sha1_full,sha1_mid) values( \$1, \$2, \$3, \$4 )";
+ updateJob($query);
+ @values = ($bibid, $source, @shas[0], @shas[1]);
+ }
+ dbhandler_updateWithParameters($query,\@values);
+
+}
+
+sub parseMARCEdits
+{
+
+ my %confGroups = ('control' => 'marc_edit_control_', 'standard' => 'marc_edit_standard_');
+ my $test = 1;
+ my $count = 0;
+
+ while (( my $internal, my $mvalue ) = each(%conf))
+ {
+ while (( my $gtype, my $groupID ) = each(%confGroups))
+ {
+ if( $internal =~ m/$groupID/g)
+ {
+ $count++;
+ $test = parseMARCEdits_sanity_check($gtype, $mvalue);
+ print "\n\nThere is an error in the MARC Manipulation definitions. Please see log for details:\n\nless ". $conf{"logfile"} ."\n\n" unless $test;
+ exit unless $test;
+ my %tt = %{$test};
+ my @ar = @{$marcEdits{$tt{'type'}}};
+ push (@ar, \%tt);
+ $marcEdits{$tt{'type'}} = \@ar;
+ logfile_addLine($log,Dumper(\%marcEdits));
+ }
+ }
+ }
+}
+
+sub parseMARCEdits_sanity_check
+{
+ my $gtype = shift;
+ my $value = shift;
+ my @allowedTypes = ('add','remove','removesubfield','replace');
+ my %typeExpectedArraySize = ('standard' => 5, 'control' => 3 ); # one based
+ logfile_addLogLine($log,"Attempting to parse '$value'");
+ my %check = ();
+ my $exec = '%check = (' . $value . ');';
+
+ local $@;
+ eval($exec);
+
+ logfile_addLogLine($log,"Failed to parse '$value'") if $@;
+ return 0 if $@;
+
+ logfile_addLine($log,Dumper(\%check)) if $debug;
+
+ if(!$check{"type"})
+ {
+
+ logfile_addLogLine($log,"Type Undefined '$value'");
+ return 0;
+ }
+
+ if(!$check{"def"})
+ {
+
+ logfile_addLogLine($log,"def Undefined '$value'");
+ return 0;
+ }
+
+ # Check type values
+ my $allowedTypeExists = 0;
+ foreach(@allowedTypes)
+ {
+ $allowedTypeExists = 1 if $_ eq $check{'type'};
+ }
+ logfile_addLogLine($log,"Invalid type '". $check{'type'} ."'") if !$allowedTypeExists;
+ return 0 if !$allowedTypeExists;
+
+ my @def = @{$check{'def'}};
+
+ if($check{'type'} eq 'removesubfield')
+ {
+ my $totalArray = scalar @def;
+ logfile_addLogLine($log,"Incorrect number of array values (expecting: 2)") if $totalArray != 2;
+ return 0 if $totalArray != 2;
+ }
+ elsif($check{'type'} ne 'remove')
+ {
+ my $totalArray = scalar @def;
+ logfile_addLogLine($log,"Incorrect number of array values (expecting: ".$typeExpectedArraySize{$gtype} .")") if $totalArray != $typeExpectedArraySize{$gtype};
+ return 0 if $totalArray != $typeExpectedArraySize{$gtype};
+ }
+
+ my $fieldTest = testField(@def[0], $gtype, $check{'type'});
+ return $fieldTest unless $fieldTest;
+
+ if($check{'type'} eq 'remove')
+ {
+ foreach(@def)
+ {
+ $fieldTest = testField($_, $gtype, $check{'type'});
+ return $fieldTest unless $fieldTest;
+ }
+ }
+
+ if($check{'type'} eq 'removesubfield')
+ {
+ $fieldTest = testField(@def[0], $gtype, $check{'type'});
+ return $fieldTest unless $fieldTest;
+ $fieldTest = testSubfield(@def[1], $gtype);
+ return $fieldTest unless $fieldTest;
+ }
+
+ # The rest of the tests are for non-removals
+ if($check{'type'} ne 'remove' && $check{'type'} ne 'removesubfield')
+ {
+ # control field tests
+ if($gtype eq 'control')
+ {
+ if(!(@def[1] =~ m/^\d+$/))
+ {
+ logfile_addLogLine($log,"Invalid MARC field def: '".@def[1]."' is not a valid column position");
+ return 0;
+ }
+ }
+ # Standard field tests
+ else
+ {
+ $fieldTest = testIndicator(@def[1], $gtype);
+ return $fieldTest unless $fieldTest;
+ $fieldTest = testIndicator(@def[2], $gtype);
+ return $fieldTest unless $fieldTest;
+ $fieldTest = testSubfield(@def[3], $gtype);
+ return $fieldTest unless $fieldTest;
+ }
+ }
+ return \%check;
+}
+
+sub testSubfield
+{
+ my $subfield = shift;
+ if( !($subfield =~ m/^[\dA-Za-z]$/) )
+ {
+ logfile_addLogLine($log,"Invalid MARC subfield def: ".$subfield);
+ return 0;
+ }
+ return 1;
+}
+
+sub testIndicator
+{
+ my $ind = shift;
+ return 1 if !$ind; #null is fine
+ return 1 if $ind eq 'same'; #controlled vocab, "same" means no change
+
+ if( !($ind =~ m/^\d$/) )
+ {
+ logfile_addLogLine($log,"Invalid MARC indicator def: ".$ind);
+ return 0;
+ }
+ return 1;
+}
+
+sub testField
+{
+ my $field = shift;
+ my $gtype = shift;
+ my $rtype = shift;
+
+ my @splits = split(/_/,mobutil_trim($field));
+ $field = shift @splits;
+ my $ind1 = shift @splits;
+ my $ind2 = shift @splits;
+ my $subfield = shift @splits;
+
+ if( !($field =~ m/^\d\d\d$/) )
+ {
+ logfile_addLogLine($log,"Invalid MARC field def: ".$field);
+ return 0;
+ }
+
+ if($rtype eq 'remove' && $ind1 && !$subfield)
+ {
+ logfile_addLogLine($log,"Tag removal requires both indicators and subfield to be defined like this: 'xxx_ind1_ind2_subfield'");
+ return 0;
+ }
+
+ if($rtype ne 'remove' && $subfield)
+ {
+ logfile_addLogLine($log,"Subfield definition not allowed unless 'remove' is specified subfield def: '$subfield' defined with field '$field'");
+ return 0;
+ }
+
+ my @checks = ($ind1, $ind2, $subfield);
+
+ foreach(@checks)
+ {
+ if($_ && ( ($_ ne 'all') && (length($_) != 1) && ($_ ne 'none') )) # Definition either needs to be specific or "all" or "none"
+ {
+ logfile_addLogLine($log,"Invalid MARC subfield def: '$field' '$ind1' '$ind2' '$subfield'");
+ return 0;
+ }
+ }
+
+ my $numTest = $field + 0;
+
+ if($numTest > 9 && $gtype eq 'control')
+ {
+ logfile_addLogLine($log,"Invalid MARC field def: '$field' is not a control field");
+ return 0;
+ }
+ if($numTest < 10 && $gtype eq 'standard')
+ {
+ logfile_addLogLine($log,"Invalid MARC field def: '$field' is not a standard field");
+ return 0;
+ }
+
+ if($numTest > 999)
+ {
+ logfile_addLogLine($log,"Invalid MARC field def: '$field' is out of range");
+ return 0;
+ }
+ return 1;
+}
+
+sub calculateTimeDifference
+{
+ my $dt = shift;
+ my $now = DateTime->now(time_zone => "local");
+ my $difference = $now - $dt;
+ my $format = DateTime::Format::Duration->new(pattern => '%M %S');
+ my $duration = $format->format_duration($difference);
+ my ($min, $sec) = split(/\s/,$duration);
+ my $days = 0;
+ my $hours = 0;
+ if($min > 60)
+ {
+ $hours = floor($min / 60);
+ $min = $min % 60;
+ if ($hours > 24)
+ {
+ $days = floor($hours / 24);
+ $hours = $hours % 24;
+ }
+ }
+ return "$days days, $hours hours, $min minutes and $sec seconds";
+}
+
+sub reingest
+{
+ my $bibid = shift;
+ my $query =
+ "SELECT metabib.reingest_metabib_field_entries(bib_id := \$1, skip_facet := FALSE, skip_browse := FALSE, skip_search := FALSE, skip_display := FALSE)";
+ my @vals = ($bibid);
+ dbhandler_updateWithParameters($query, \@vals);
+
+ $query = "SELECT metabib.reingest_record_attributes(rid := id, prmarc := marc)
+ FROM biblio.record_entry
+ WHERE id = \$1
+ ";
+ dbhandler_updateWithParameters($query, \@vals);
+}
+
+sub logfile_addLogLine
+{
+ my $dt = DateTime->now(time_zone => "local"); # Stores current date and time as datetime object
+ my $date = $dt->ymd; # Retrieves date as a string in 'yyyy-mm-dd' format
+ my $time = $dt->hms; # Retrieves time as a string in 'hh:mm:ss' format
+
+ my $file = shift;
+ my $line = shift;
+ my $datetime = "$date $time"; # creates 'yyyy-mm-dd hh:mm:ss' string
+ $datetime = mobutil_makeEvenWidth($datetime,20);
+ my $ret = 1;
+ open(OUTPUT, '>> '.$file) or $ret=0;
+ binmode(OUTPUT, ":utf8");
+ print OUTPUT $datetime,": $line\n";
+ close(OUTPUT);
+ return $ret;
+}
+
+sub logfile_addLine
+{
+ my $file = shift;
+ my $line = shift;
+ my $ret=1;
+ open(OUTPUT, '>> '.$file) or $ret=0;
+ binmode(OUTPUT, ":utf8");
+ print OUTPUT "$line\n";
+ close(OUTPUT);
+ return $ret;
+}
+
+sub logfile_truncFile
+{
+ my $file = shift;
+ my $line = shift;
+ my $ret=1;
+ open(OUTPUT, '> '.$file) or $ret=0;
+ binmode(OUTPUT, ":utf8");
+ print OUTPUT "$line\n";
+ close(OUTPUT);
+ return $ret;
+}
+
+
+sub logfile_readFile
+{
+ my $file = shift;
+ my $trys=0;
+ my $failed=0;
+ my @lines;
+ #print "Attempting open\n";
+ if( -e $file )
+ {
+ my $worked = open (inputfile, '< '. $file);
+ if(!$worked)
+ {
+ print "******************Failed to read file*************\n";
+ }
+ binmode(inputfile, ":utf8");
+ while (!(open (inputfile, '< '. $file)) && $trys<100)
+ {
+ print "Trying again attempt $trys\n";
+ $trys++;
+ sleep(1);
+ }
+ if($trys<100)
+ {
+ #print "Finally worked... now reading\n";
+ @lines = <inputfile>;
+ close(inputfile);
+ }
+ else
+ {
+ print "Attempted $trys times. COULD NOT READ FILE: $file\n";
+ }
+ close(inputfile);
+ }
+ else
+ {
+ print "File does not exist: $file\n";
+ }
+ return \@lines;
+}
+
+sub logfile_copyFile
+{
+ my $file = shift;
+ my $destination = shift;
+ return copy($file,$destination);
+}
+
+sub logfile_deleteFile
+{
+ my $file = shift;
+ if (-e $file)
+ {
+ if(unlink($file))
+ {
+ return 1;
+ }
+ }
+ else
+ {
+ return 1;
+ }
+
+ return 0;
+}
+
+
+sub mobutil_readConfFile
+{
+ my %ret = ();
+ my $ret = \%ret;
+ my $file = shift;
+
+ if( !( -e $file ) )
+ {
+ print "Config File does not exist\n";
+ return 0;
+ }
+
+ my @lines = @{ logfile_readFile($file) };
+
+ foreach my $line (@lines)
+ {
+ $line =~ s/\n//; #remove newline characters
+ my $cur = mobutil_trim($line);
+ my $len = length($cur);
+ if($len>0)
+ {
+ if(substr($cur,0,1)ne"#")
+ {
+ my @s = split (/=/, $cur);
+ my $Name = shift @s;
+ my $Value = join('=', @s);
+ $$ret{mobutil_trim($Name)} = mobutil_trim($Value);
+ }
+ }
+ }
+
+ return \%ret;
+}
+
+sub mobutil_makeEvenWidth #line, width
+{
+ my $ret;
+ my $line = shift;
+ my $width = shift;
+ #print "I got \"$line\" and width $width\n";
+ $ret=$line;
+ if(length($line)>=$width)
+ {
+ $ret=substr($ret,0,$width);
+ }
+ else
+ {
+ while(length($ret)<$width)
+ {
+ $ret=$ret." ";
+ }
+ }
+ #print "Returning \"$ret\"\nWidth: ".length($ret)."\n";
+ return $ret;
+
+}
+
+sub mobutil_trim
+{
+ my $string = shift;
+ $string =~ s/^\s+//;
+ $string =~ s/\s+$//;
+ return $string;
+}
+
+sub mobutil_chooseNewFileName #path to output folder,file prefix, file extention returns full path to new file name
+{
+ my $path = shift;
+ # Add trailing slash if there isn't one
+ if(substr($path,length($path)-1,1) ne '/')
+ {
+ $path = $path.'/';
+ }
+
+ my $seed = shift;
+ my $ext = shift;
+ my $ret="";
+ if( -d $path)
+ {
+ my $num="";
+ $ret = $path . $seed . $num . '.' . $ext;
+ while(-e $ret)
+ {
+ if($num eq "")
+ {
+ $num=-1;
+ }
+ $num = $num+1;
+ $ret = $path . $seed . $num . '.' . $ext;
+ }
+ }
+ else
+ {
+ $ret = 0;
+ }
+
+ return $ret;
+}
+
+sub mobutil_insertDataIntoColumn #1 based column position
+{
+ my $ret = shift;
+ my $data = shift;
+ my $column = shift;
+ my $len = length($ret);
+ if(length($ret)<($column-1))
+ {
+ while(length($ret)<($column-1))
+ {
+ $ret.=" ";
+ }
+ $ret.=$data;
+ }
+ else
+ {
+ my @ogchars = split("",$ret);
+ my @insertChars = split("",$data);
+ my $len = $#insertChars;
+ for my $i (0..$#insertChars)
+ {
+ @ogchars[$i+$column-1] = @insertChars[$i];
+ }
+ $ret="";
+ foreach(@ogchars)
+ {
+ $ret.=$_;
+ }
+ }
+ return $ret;
+
+}
+
+
+sub dbhandler_setupConnection
+{
+ my $dbname = shift;
+ my $host = shift;
+ my $login = shift;
+ my $pass = shift;
+ my $port = shift;
+ $dbHandler = DBI->connect("DBI:Pg:dbname=$dbname;host=$host;port=$port", $login, $pass, { AutoCommit => 1}); #'RaiseError' => 1,post_connect_sql => "SET CLIENT_ENCODING TO 'UTF8'", pg_utf8_strings => 1
+}
+
+
+sub dbhandler_update
+{
+ my $querystring = shift;
+ my $ret = $dbHandler->do($querystring);
+ return $ret;
+}
+
+sub dbhandler_updateWithParameters
+{
+ my $querystring = @_[0];
+ my @values = @{@_[1]};
+ my $q = $dbHandler->prepare($querystring);
+ my $i=1;
+ foreach(@values)
+ {
+ my $param = $_;
+ if(lc($param eq 'null'))
+ {
+ $param = undef;
+ }
+ $q->bind_param($i, $param);
+ $i++;
+ }
+ my $ret = $q->execute();
+ return $ret;
+}
+
+sub dbhandler_query
+{
+ my $querystring = shift;
+ my @ret;
+
+ my $query = $dbHandler->prepare($querystring);
+ $query->execute();
+
+ while (my $row = $query->fetchrow_arrayref())
+ {
+ push(@ret,[@$row]);
+ }
+ undef($querystring);
+ return \@ret;
+}
+
+sub email_setup
+{
+ my @a;
+ my @b;
+
+ my $email =
+ {
+ fromEmailAddress => @_[0],
+ emailRecipientArray => \@{ @_[1] },
+ notifyError => @_[2], #true/false
+ notifySuccess => @_[3], #true/false
+ confArray => \%{@_[4]},
+ errorEmailList => \@a,
+ successEmailList => \@b
+ };
+ my %theseemails = %{$email->{confArray}};
+
+ my @emails = split(/,/,@theseemails{"successemaillist"});
+ for my $y(0.. $#emails)
+ {
+ @emails[$y]=mobutil_trim(@emails[$y]);
+ }
+ $email->{successEmailList} = \@emails;
+
+
+ my @emails2 = split(/,/,@theseemails{"erroremaillist"});
+ for my $y(0.. $#emails2)
+ {
+ @emails2[$y]=mobutil_trim(@emails2[$y]);
+ }
+ $email->{errorEmailList} =\@emails2;
+ return $email;
+}
+
+sub email_send #subject, body
+{
+ my $email = @_[0];
+ my $subject = @_[1];
+ my $body = @_[2];
+ my $fromEmail = $email->{fromEmailAddress};
+ my @additionalEmails = @{$email->{emailRecipientArray}};
+ my @toEmails = ("From", $fromEmail);
+ my @success = @{$email->{successEmailList}};
+ my @error = @{$email->{errorEmailList}};
+
+
+ if($email->{'notifyError'})
+ {
+ for my $r (0.. $#error)
+ {
+ push(@toEmails, "To");
+ push(@toEmails, @error[$r]);
+ }
+ }
+
+ if($email->{'notifySuccess'})
+ {
+ for my $r (0.. $#success)
+ {
+ push(@toEmails, "To");
+ push(@toEmails, @success[$r]);
+ }
+
+ }
+
+ for my $r (0.. $#additionalEmails)
+ {
+ push(@toEmails, "To");
+ push(@toEmails, @additionalEmails[$r]);
+ }
+ push(@toEmails, "Subject");
+ push(@toEmails, $subject);
+ my $message;
+
+ $message = Email::MIME->create(
+ header_str => [
+ @toEmails
+ ],
+ attributes => {
+ encoding => 'quoted-printable',
+ charset => 'ISO-8859-1',
+ },
+ body_str => "$body\n");
+ my $valid=1;
+ if($valid)
+ {
+ use Email::Sender::Simple qw(sendmail);
+ sendmail($message);
+ }
+
+}
+
+sub setupSchema
+{
+ my $query = "SELECT schema_name FROM information_schema.schemata WHERE schema_name = 'bib_magic'";
+ my @results = @{dbhandler_query($query)};
+ if($#results==-1)
+ {
+ $query = "CREATE SCHEMA bib_magic";
+ dbhandler_update($query);
+
+ $query = "CREATE TABLE bib_magic.job
+ (
+ id bigserial NOT NULL,
+ start_time timestamp with time zone NOT NULL DEFAULT now(),
+ last_update_time timestamp with time zone NOT NULL DEFAULT now(),
+ status text default 'processing',
+ current_action text,
+ current_action_num bigint default 0,
+ CONSTRAINT job_pkey PRIMARY KEY (id)
+ )";
+ dbhandler_update($query);
+
+ $query = "CREATE TABLE bib_magic.import_status(
+ id bigserial NOT NULL,
+ bibtag text,
+ filename text,
+ z01 text,
+ title text,
+ sha1 text,
+ type text default \$\$import\$\$,
+ status text default \$\$new\$\$,
+ processed boolean default false,
+ row_change_time timestamp default now(),
+ marc_xml text,
+ bib bigint,
+ job bigint NOT NULL,
+ CONSTRAINT import_status_pkey PRIMARY KEY (id),
+ CONSTRAINT import_status_fkey FOREIGN KEY (job)
+ REFERENCES bib_magic.job (id) MATCH SIMPLE)";
+ dbhandler_update($query);
+
+ $query = "CREATE TABLE bib_magic.item_reassignment(
+ id serial,
+ copy bigint,
+ prev_bib bigint,
+ target_bib bigint,
+ statusid bigint,
+ change_time timestamp default now(),
+ job bigint NOT NULL,
+ CONSTRAINT item_reassignment_fkey FOREIGN KEY (job)
+ REFERENCES bib_magic.job (id) MATCH SIMPLE,
+ CONSTRAINT item_reassignment_statusid_fkey FOREIGN KEY (statusid)
+ REFERENCES bib_magic.import_status (id) MATCH SIMPLE
+ )";
+ dbhandler_update($query);
+
+ $query = "CREATE TABLE bib_magic.bib_marc_update(
+ id bigserial NOT NULL,
+ record bigint,
+ prev_marc text,
+ changed_marc text,
+ new_record boolean NOT NULL DEFAULT false,
+ change_time timestamp default now(),
+ job bigint NOT NULL,
+ CONSTRAINT bib_marc_update_fkey FOREIGN KEY (job)
+ REFERENCES bib_magic.job (id) MATCH SIMPLE)";
+ dbhandler_update($query);
+
+ $query = "CREATE TABLE bib_magic.bib_merge(
+ id bigserial NOT NULL,
+ leadbib bigint,
+ subbib bigint,
+ statusid bigint,
+ change_time timestamp default now(),
+ job bigint NOT NULL,
+ CONSTRAINT bib_merge_fkey FOREIGN KEY (job)
+ REFERENCES bib_magic.job (id) MATCH SIMPLE,
+ CONSTRAINT bib_merge_statusid_fkey FOREIGN KEY (statusid)
+ REFERENCES bib_magic.import_status (id) MATCH SIMPLE)";
+ dbhandler_update($query);
+
+ $query = "CREATE TABLE bib_magic.nine_sync(
+ id bigserial NOT NULL,
+ record bigint,
+ nines_synced text,
+ url text,
+ change_time timestamp default now())";
+ dbhandler_update($query);
+
+ $query = "CREATE TABLE bib_magic.bib_sha1(
+ bib bigint,
+ bib_source bigint,
+ sha1_full text,
+ sha1_mid text,
+ CONSTRAINT bib_sha1_bib_fkey FOREIGN KEY (bib)
+ REFERENCES biblio.record_entry (id) MATCH SIMPLE,
+ CONSTRAINT bib_sha1_bib_source_fkey FOREIGN KEY (bib_source)
+ REFERENCES config.bib_source (id) MATCH SIMPLE)";
+ dbhandler_update($query);
+
+ $query = "CREATE INDEX bib_magic_bib_sha1_full_idx
+ ON bib_magic.bib_sha1
+ USING btree (sha1_full)";
+ dbhandler_update($query);
+
+ $query = "CREATE INDEX bib_magic_bib_sha1_mid_idx
+ ON bib_magic.bib_sha1
+ USING btree (sha1_mid)";
+ dbhandler_update($query);
+
+ $query = "CREATE INDEX bib_magic_bib_sha1_bib_idx
+ ON bib_magic.bib_sha1
+ USING btree (bib)";
+ dbhandler_update($query);
+
+ $query = "CREATE INDEX bib_magic_bib_sha1_bib_source_idx
+ ON bib_magic.bib_sha1
+ USING btree (bib_source)";
+ dbhandler_update($query);
+
+ $query = "CREATE INDEX bib_magic_bib_sha1_sha1_full_bib_source_idx
+ ON bib_magic.bib_sha1
+ USING btree (sha1_full,bib_source)";
+ dbhandler_update($query);
+
+ $query = "CREATE INDEX bib_magic_bib_sha1_sha1_mid_bib_source_idx
+ ON bib_magic.bib_sha1
+ USING btree (sha1_mid,bib_source)";
+ dbhandler_update($query);
+
+ $query = "CREATE INDEX bib_magic_import_status_job_idx
+ ON bib_magic.import_status
+ USING btree (job)";
+ dbhandler_update($query);
+
+ $query = "CREATE INDEX bib_magic_import_status_status_idx
+ ON bib_magic.import_status
+ USING btree (status)";
+ dbhandler_update($query);
+
+ $query = "CREATE INDEX bib_magic_import_status_type_idx
+ ON bib_magic.import_status
+ USING btree (type)";
+ dbhandler_update($query);
+
+ $query = "CREATE INDEX bib_magic_import_status_bib_idx
+ ON bib_magic.import_status
+ USING btree (bib)";
+ dbhandler_update($query);
+ }
+}
+
+ exit;