use lib '/openils/lib/perl5/';
-#use OpenSRF::System;
-#use OpenILS::Utils::Fieldmapper;
-#use OpenSRF::Utils::SettingsClient;
-#
-#OpenSRF::System->bootstrap_client(config_file =>
-#'/openils/conf/bootstrap.conf');
-#Fieldmapper->import(IDL =>
-# OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
-#
-# # do this after bootstrapping/importing IDL
-# require OpenILS::Application::Search;
-#
-# my $meth = OpenSRF::Application->method_lookup(
-# 'open-ils.search.biblio.metarecord.mods_slim.retrieve');
-# my @data = $meth->run(1);
-# my $mods = shift @data;
-# print "Got mvr: " . $mods->title . "\n";
-
-
-
use OpenSRF::System;
use OpenSRF::Application;
use OpenSRF::EX qw/:try/;
use Digest::MD5 qw/md5_hex/;
use JSON;
use Data::Dumper;
+use Unicode::Normalize;
use Time::HiRes qw/time/;
use Getopt::Long;
MARC::Charset->ignore_errors(1);
-my ($id_field, $count, $user, $password, $config, $keyfile, @files) =
+my ($id_field, $count, $user, $password, $config, $keyfile, @files, @trash_fields) =
('998', 1, 'admin', 'open-ils', '/openils/conf/bootstrap.conf');
GetOptions(
'keyfile=s' => \$keyfile,
'config=s' => \$config,
'file=s' => \@files,
+ 'trash=s' => \@trash_fields,
);
@files = @ARGV if (!@files);
my $starttime = time;
while ( my $rec = $batch->next ) {
- my $id = $rec->subfield($id_field => 'a') || $count;
+ my $id;
+ my $field = $rec->field($id_field);
+
+ if ($field) {
+ if ($field->is_control_field) {
+ $id = $field->data;
+ } else {
+ $id = $field->subfield('a');
+ }
+ } else {
+ $id = $count;
+ }
+
if ($id =~ /(\d+)/o) {
$id = $1;
}
if ($keyfile) {
if (my $tcn = $keymap{$id}) {
- $rec->delete_field( $_ ) for ($rec->field('035'));
- $rec->append_fields( MARC::Field->new( '035', '', '', 'a', $tcn ) );
+ $rec->delete_field( $_ ) for ($rec->field($id_field));
+ $rec->append_fields( MARC::Field->new( $id_field, '', '', 'a', $tcn ) );
} else {
$count++;
next;
}
}
- $rec = preprocess($rec, $id);
+ $rec = preprocess($rec);
if (!$rec) {
next;
}
- my $tcn_value = $rec->subfield('039' => 'a');
- my $tcn_source = $rec->subfield('039' => 'b');
+ my $tcn_value = $rec->subfield($id_field => 'a');
+ my $tcn_source = $rec->subfield($id_field => 'b');
(my $xml = $rec->as_xml_record()) =~ s/\n//sog;
$xml =~ s/^<\?xml.+\?\s*>//go;
$xml =~ s/>\s+</></go;
+ $xml =~ s/\p{Cc}//go;
+ $xml = entityize($xml);
my $bib = new Fieldmapper::biblio::record_entry;
$bib->id($id);
}
if (!$id) {
- my $f = $rec->field('035');
+ my $f = $rec->field($id_field);
$id = $f->subfield('a') if ($f);
$source = 's';
}
return undef;
}
+ $rec->delete_field($_) for ($rec->field($id_field, @trash_fields));
+
$id =~ s/\s*$//o;
$id =~ s/^\s*//o;
$id =~ s/(\S+)$/$1/o;
}
my $tcn = MARC::Field->new(
- '039',
+ $id_field,
'', '',
'a', $id,
'b', do { $source_map{$source} || 'System' },
);
- $rec->delete_field($_) for ($rec->field('035','948','998'));
$rec->append_fields($tcn);
return $rec;
return $authtoken;
}
+sub entityize {
+ my $stuff = shift;
+ my $form = shift;
+
+ if ($form and $form eq 'D') {
+ $stuff = NFD($stuff);
+ } else {
+ $stuff = NFC($stuff);
+ }
+
+ $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
+ return $stuff;
+}
+
--- /dev/null
+#!/usr/bin/perl
+use strict;
+
+use lib '/openils/lib/perl5/';
+
+use OpenSRF::System;
+use OpenSRF::EX qw/:try/;
+use OpenSRF::Utils::SettingsClient;
+use OpenILS::Utils::Fieldmapper;
+use JSON;
+use FileHandle;
+
+use Time::HiRes qw/time/;
+use Getopt::Long;
+
+my @files;
+my ($config, $output, @auto, @order) =
+ ('/openils/conf/bootstrap.conf');
+
+GetOptions(
+ 'config=s' => \$config,
+ 'output=s' => \$output,
+ 'autoprimary=s' => \@auto,
+ 'order=s' => \@order,
+);
+
+my %lineset;
+my %fieldcache;
+
+OpenSRF::System->bootstrap_client( config_file => $config );
+Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
+
+my $count = 0;
+my $starttime = time;
+while ( my $rec = <> ) {
+ next unless ($rec);
+
+ my $row;
+ try {
+ $row = JSON->JSON2perl($rec);
+ } catch Error with {
+ my $e = shift;
+ warn "\n\n !!! Error : $e \n\n at or around line $count\n";
+ };
+ die unless ($row);
+
+ my $class = $row->class_name;
+ my $hint = $row->json_hint;
+
+ if (!$lineset{$hint}) {
+ $lineset{$hint} = [];
+ my @cols = $row->real_fields;
+ if (grep { $_ eq $hint} @auto) {
+ @cols = grep { $_ ne $class->Identity } @cols;
+ }
+
+ $fieldcache{$hint} =
+ { table => $class->Table,
+ fields => \@cols,
+ };
+ }
+
+ push @{ $lineset{$hint} }, [map { $row->$_ } @{ $fieldcache{$hint}{fields} }];
+
+ if (!($count % 500)) {
+ print STDERR "\r$count\t". $count / (time - $starttime);
+ }
+
+ $count++;
+}
+
+print STDERR "\nWriting file ...\n";
+
+$output = '&STDOUT' unless ($output);
+$output = FileHandle->new(">$output") if ($output);
+
+binmode($output,'utf8');
+
+$output->print("SET CLIENT_ENCODING TO 'UNICODE';\n\n");
+
+for my $h (@order) {
+ my $fields = join(',', @{ $fieldcache{$h}{fields} });
+ $output->print( "COPY $fieldcache{$h}{table} ($fields) FROM STDIN;\n" );
+
+ for my $line (@{ $lineset{$h} }) {
+ my @data;
+ for my $d (@$line) {
+ if (!defined($d)) {
+ $d = '\N';
+ } else {
+ $d =~ s/\t/\\t/go;
+ $d =~ s/\\/\\\\/go;
+ }
+ push @data, $d;
+ }
+ $output->print( join("\t", @data)."\n" );
+ }
+
+ $output->print('\.'."\n\n");
+}