From 6decaf08d6631e85a70390024d9ee5a0dda674b3 Mon Sep 17 00:00:00 2001 From: Dan Scott Date: Wed, 8 May 2013 10:06:37 -0400 Subject: [PATCH] Preserve Conifer migration scripts These might be of historical interest to other sites migrating to Evergreen, or to someone who tries to figure out how Conifer came to be what it is today. Signed-off-by: Dan Scott --- tools/migration-scripts/circ_rules.sql | 29 + tools/migration-scripts/config_z3950.sql | 210 ++++ tools/migration-scripts/create_test_users.pl | 68 ++ tools/migration-scripts/demousers.sql | 1124 ++++++++++++++++++++ tools/migration-scripts/fixURIs.pl | 110 ++ tools/migration-scripts/fix_bad_marcxml.pl | 65 ++ tools/migration-scripts/fix_windsors_diacritics.pl | 166 +++ tools/migration-scripts/generate_copies.sql | 200 ++++ tools/migration-scripts/lul_import.pl | 94 ++ tools/migration-scripts/org_units.sql | 79 ++ tools/migration-scripts/patron_groups.sql | 58 + tools/migration-scripts/sip_user.sql | 29 + tools/migration-scripts/windsor_import.pl | 94 ++ .../migration-scripts/windsor_patron_load_base.csv | 1 + tools/migration-scripts/windsor_patrons.sql | 260 +++++ 15 files changed, 2587 insertions(+) create mode 100644 tools/migration-scripts/circ_rules.sql create mode 100644 tools/migration-scripts/config_z3950.sql create mode 100644 tools/migration-scripts/create_test_users.pl create mode 100644 tools/migration-scripts/demousers.sql create mode 100644 tools/migration-scripts/fixURIs.pl create mode 100644 tools/migration-scripts/fix_bad_marcxml.pl create mode 100644 tools/migration-scripts/fix_windsors_diacritics.pl create mode 100644 tools/migration-scripts/generate_copies.sql create mode 100644 tools/migration-scripts/lul_import.pl create mode 100644 tools/migration-scripts/org_units.sql create mode 100644 tools/migration-scripts/patron_groups.sql create mode 100644 tools/migration-scripts/sip_user.sql create mode 100644 tools/migration-scripts/windsor_import.pl create mode 100644 tools/migration-scripts/windsor_patron_load_base.csv create mode 100644 tools/migration-scripts/windsor_patrons.sql diff --git a/tools/migration-scripts/circ_rules.sql b/tools/migration-scripts/circ_rules.sql new file mode 100644 index 0000000000..9e3b9c8431 --- /dev/null +++ b/tools/migration-scripts/circ_rules.sql @@ -0,0 +1,29 @@ +INSERT INTO config.rule_circ_duration (name, extended, normal, shrt, max_renewals) VALUES + ('120_days_2_renew', '120 days', '120 days', '120 days', 2), -- OSUL rules + ('3_weeks_2_renew', '3 weeks', '3 weeks', '3 weeks', 2), + ('14_days_3_renew', '14 days', '14 days', '14 days', 3), -- OWA rules + ('28_days_3_renew', '28 days', '28 days', '28 days', 3), + ('7_days_1_renew', '7 days', '7 days', '7 days', 1), + ('7_days_3_renew', '7 days', '7 days', '7 days', 3), + ('120_minutes_0_renew', '120 minutes', '120 minutes', '120 minutes', 0), + + ('28_days_5_renew', '28 days', '28 days', '28 days', 5), -- OWAL rules + ('14_days_5_renew', '14 days', '14 days', '14 days', 5), + ('2_days_20_renew', '2 days', '2 days', '2 days', 20), + ('3_days_20_renew', '3 days', '3 days', '3 days', 20), + ('1_day_1_renew', '1 day', '1 day', '1 day', 1), + ('16_hours_20_renew', '16 hours', '16 hours', '16 hours', 20), + ('3_hours_20_renew', '3 hours', '3 hours', '3 hours', 20), + ('5_hours_20_renew', '5 hours', '5 hours', '5 hours', 20), + ('non_circ', '0 days', '0 days', '0 days', 0); -- obviously a sign that permit_patron or permit_copy should be invoked instead, but oh well for now + +INSERT INTO config.rule_max_fine (name, amount) VALUES + ('10_dollars', 10.00), + ('50_dollars', 50.00); + +INSERT INTO config.rule_recuring_fine (name, high, normal, low, recurance_interval) VALUES + ('free', 0.00, 0.00, 0.00, '1 day'), + ('5_cents_per_minute', 00.05, 00.05, 00.05, '1 minute'), + ('60_cents_per_day', 00.60, 00.60, 00.60, '1 day'), + ('2_dollars_per_day', 2.00, 2.00, 2.00, '1 day'), + ('10_dollars_per_day', 10.00, 10.00, 10.00, '1 day'); diff --git a/tools/migration-scripts/config_z3950.sql b/tools/migration-scripts/config_z3950.sql new file mode 100644 index 0000000000..ab1623f235 --- /dev/null +++ b/tools/migration-scripts/config_z3950.sql @@ -0,0 +1,210 @@ +-- +-- PostgreSQL database dump +-- + +SET client_encoding = 'SQL_ASCII'; +SET standard_conforming_strings = off; +SET check_function_bodies = false; +SET client_min_messages = warning; +SET escape_string_warning = off; + +SET search_path = config, pg_catalog; + +-- +-- Name: z3950_attr_id_seq; Type: SEQUENCE SET; Schema: config; Owner: evergreen +-- + +SELECT pg_catalog.setval('z3950_attr_id_seq', 287, true); + + +-- +-- Data for Name: z3950_attr; Type: TABLE DATA; Schema: config; Owner: evergreen +-- + +COPY z3950_attr (id, source, name, label, code, format, truncation) FROM stdin; +110 McGill tcn Title Control Number 12 1 0 +111 McGill isbn ISBN 7 6 0 +112 McGill lccn LCCN 9 1 0 +113 McGill author Author 1003 6 0 +114 McGill title Title 4 6 0 +115 McGill issn ISSN 8 1 0 +116 McGill publisher Publisher 1018 6 0 +117 McGill pubdate Publication Date 31 1 0 +118 McGill item_type Item Type 1001 1 0 +119 UChicago tcn Title Control Number 12 1 0 +120 UChicago isbn ISBN 7 6 0 +121 UChicago lccn LCCN 9 1 0 +122 UChicago author Author 1003 6 0 +123 UChicago title Title 4 6 0 +124 UChicago issn ISSN 8 1 0 +125 UChicago publisher Publisher 1018 6 0 +126 UChicago pubdate Publication Date 31 1 0 +127 UChicago item_type Item Type 1001 1 0 +128 UMontréal tcn Title Control Number 12 1 0 +129 UMontréal isbn ISBN 7 6 0 +130 UMontréal lccn LCCN 9 1 0 +131 UMontréal author Author 1003 6 0 +132 UMontréal title Title 4 6 0 +133 UMontréal issn ISSN 8 1 0 +134 UMontréal publisher Publisher 1018 6 0 +135 UMontréal pubdate Publication Date 31 1 0 +136 UMontréal item_type Item Type 1001 1 0 +137 ULaval tcn Title Control Number 12 1 0 +138 ULaval isbn ISBN 7 6 0 +139 ULaval lccn LCCN 9 1 0 +140 ULaval author Author 1003 6 0 +141 ULaval title Title 4 6 0 +142 ULaval issn ISSN 8 1 0 +143 ULaval publisher Publisher 1018 6 0 +144 ULaval pubdate Publication Date 31 1 0 +145 ULaval item_type Item Type 1001 1 0 +146 UofT tcn Title Control Number 12 1 0 +147 UofT isbn ISBN 7 6 0 +148 UofT lccn LCCN 9 1 0 +149 UofT author Author 1003 6 0 +150 UofT title Title 4 6 0 +151 UofT issn ISSN 8 1 0 +152 UofT publisher Publisher 1018 6 0 +153 UofT pubdate Publication Date 31 1 0 +154 UofT item_type Item Type 1001 1 0 +174 Coutts isbn ISBN 7 6 0 +191 BAnQ tcn Title Control Number 12 1 0 +192 BAnQ isbn ISBN 7 6 0 +193 BAnQ lccn LCCN 9 1 0 +194 BAnQ author Author 1003 6 0 +195 BAnQ title Title 4 6 0 +196 BAnQ issn ISSN 8 1 0 +197 BAnQ publisher Publisher 1018 6 0 +198 BAnQ pubdate Publication Date 31 1 0 +199 BAnQ item_type Item Type 1001 1 0 +101 UWO tcn Title Control Number 12 1 0 +102 UWO author Author 1003 1 0 +103 UWO isbn ISBN 7 1 0 +104 UWO title Title 4 1 0 +105 UWO lccn LCCN 9 1 0 +106 UWO issn ISSN 8 1 0 +164 AMICUS tcn Title Control Number 12 1 1 +165 AMICUS isbn ISBN 7 6 1 +166 AMICUS lccn LCCN 9 1 1 +167 AMICUS author Author 1003 6 1 +168 AMICUS title Title 4 6 1 +169 AMICUS issn ISSN 8 1 1 +170 AMICUS publisher Publisher 1018 6 1 +171 AMICUS pubdate Publication Date 31 1 1 +172 AMICUS item_type Item Type 1001 1 1 +182 NLM tcn Title Control Number 12 1 1 +183 NLM isbn ISBN 7 6 1 +184 NLM lccn LCCN 9 1 1 +185 NLM author Author 1003 6 1 +186 NLM title Title 4 6 1 +187 NLM issn ISSN 8 1 1 +188 NLM publisher Publisher 1018 6 1 +189 NLM pubdate Publication Date 31 1 1 +190 NLM item_type Item Type 1001 1 1 +200 uottawa tcn Title Control Number 12 1 1 +201 uottawa isbn ISBN 7 1 1 +202 uottawa lccn LCCN 9 1 1 +203 uottawa author Author 1003 1 1 +204 uottawa title Title 4 1 1 +205 uottawa issn ISSN 8 1 1 +208 uottawa item_type Item Type 1001 1 1 +218 ualberta tcn Title Control Number 12 1 1 +219 ualberta isbn ISBN 7 6 1 +220 ualberta lccn LCCN 9 1 1 +221 ualberta author Author 1003 6 1 +222 ualberta title Title 4 6 1 +223 ualberta issn ISSN 8 1 1 +224 ualberta publisher Publisher 1018 6 1 +225 ualberta pubdate Publication Date 31 1 1 +226 ualberta item_type Item Type 1001 1 1 +227 yorku tcn Title Control Number 12 1 1 +228 yorku isbn ISBN 7 6 1 +229 yorku lccn LCCN 9 1 1 +230 yorku author Author 1003 6 1 +231 yorku title Title 4 6 1 +232 yorku issn ISSN 8 1 1 +234 yorku pubdate Publication Date 31 1 1 +236 umanitoba tcn Title Control Number 12 1 1 +237 umanitoba isbn ISBN 7 6 1 +238 umanitoba lccn LCCN 9 1 1 +239 umanitoba author Author 1003 6 1 +240 umanitoba title Title 4 6 1 +241 umanitoba issn ISSN 8 1 1 +242 umanitoba publisher Publisher 1018 6 1 +243 umanitoba pubdate Publication Date 31 1 1 +244 umanitoba item_type Item Type 1001 1 1 +245 umich tcn Title Control Number 12 1 1 +246 umich isbn ISBN 7 6 1 +247 umich lccn LCCN 9 1 1 +248 umich author Author 1003 6 1 +249 umich title Title 4 6 1 +250 umich issn ISSN 8 1 1 +252 umich pubdate Publication Date 31 1 1 +254 TUG tcn Title Control Number 12 1 1 +255 TUG isbn ISBN 7 6 1 +256 TUG lccn LCCN 9 1 1 +257 TUG author Author 1003 6 1 +258 TUG title Title 4 6 1 +259 TUG issn ISSN 8 1 1 +260 TUG publisher Publisher 1018 6 1 +261 TUG pubdate Publication Date 31 1 1 +262 TUG item_type Item Type 1001 1 1 +263 CarletonU tcn Title Control Number 12 1 1 +264 CarletonU isbn ISBN 7 1 1 +265 CarletonU lccn LCCN 9 1 1 +266 CarletonU author Author 1003 1 1 +267 CarletonU title Title 4 1 1 +268 CarletonU issn ISSN 8 1 1 +269 CarletonU item_type Item Type 1001 1 1 +270 UBC tcn Title Control Number 12 1 1 +271 UBC isbn ISBN 7 6 1 +272 UBC lccn LCCN 9 1 1 +273 UBC author Author 1003 6 1 +274 UBC title Title 4 6 1 +275 UBC issn ISSN 8 1 1 +276 UBC publisher Publisher 1018 6 1 +277 UBC pubdate Publication Date 31 1 1 +278 UBC item_type Item Type 1001 1 1 +279 IndianaU tcn Title Control Number 12 1 1 +280 IndianaU isbn ISBN 7 6 1 +281 IndianaU lccn LCCN 9 1 1 +282 IndianaU author Author 1003 6 1 +283 IndianaU title Title 4 6 1 +284 IndianaU issn ISSN 8 1 1 +285 IndianaU publisher Publisher 1018 6 1 +286 IndianaU pubdate Publication Date 31 1 1 +287 IndianaU item_type Item Type 1001 1 1 +\. + + +-- +-- Data for Name: z3950_source; Type: TABLE DATA; Schema: config; Owner: evergreen +-- + +COPY z3950_source (name, label, host, port, db, record_format, transmission_format, auth) FROM stdin; +UofT University of Toronto sirsi.library.utoronto.ca 2200 UNICORN F usmarc f +UMontréal Université de Montréal atrium.bib.umontreal.ca 210 ADVANCE FI usmarc f +McGill McGill University aleph.mcgill.ca 210 MUSE FI usmarc f +UChicago University of Chicago ipac.lib.uchicago.edu 210 uofc FI usmarc f +NLM NLM tegument.nlm.nih.gov 7090 VOYAGER FI usmarc f +BAnQ Bibliothèque et Archives nationales du Québec www.biblinat.gouv.qc.ca 210 IRIS FI usmarc f +Coutts Coutts z3950.couttsinfo.com 210 USMARC F usmarc t +ULaval Université Laval ariane2.ulaval.ca 2200 unicorn FI usmarc f +UWO University of Western Ontario alpha.lib.uwo.ca 210 INNOPAC FI usmarc f +AMICUS AMICUS amicus.nlc-bnc.ca 210 NL F usmarc t +uottawa University of Ottawa orbis.uottawa.ca 210 INNOPAC F usmarc f +ualberta University of Alberta ualapp.library.ualberta.ca 2200 unicorn F usmarc f +yorku York University theta.library.yorku.ca 2200 unicorn F usmarc f +umanitoba University of Manitoba lrpapp.cc.umanitoba.ca 2200 unicorn B usmarc f +umich University of Michigan z3950.lib.umich.edu 210 miu01_pub F usmarc f +TUG Tri-University Group 129.97.129.194 7090 voyager FI usmarc f +CarletonU Carleton University catalogue.library.carleton.ca 210 INNOPAC FI usmarc f +UBC University of British Columbia portage.library.ubc.ca 7090 voyager FI usmarc f +IndianaU Indiana State University luis.indstate.edu 7090 voyager FI usmarc f +\. + + +-- +-- PostgreSQL database dump complete +-- + diff --git a/tools/migration-scripts/create_test_users.pl b/tools/migration-scripts/create_test_users.pl new file mode 100644 index 0000000000..81fce98afe --- /dev/null +++ b/tools/migration-scripts/create_test_users.pl @@ -0,0 +1,68 @@ +#!/usr/bin/perl +use strict; +use warnings; + +=head1 Generate a set of staff and users for testing Evergreen + +=over + +=item get_org_unit() + +Map user IDs to org_units for permissions + +=back + +=cut + +sub get_org_unit { + my $id = shift; + if ($id <= 10) { + return 105; # OSUL + } elsif ($id <=20) { + return 106; # WINDSYS + } elsif ($id <=30) { + return 111; # ALGOMASYS + } elsif ($id <=40) { + return 125; # NOHIN + } +} + +my $password = 'demo123'; + +my @profiles = ( 1, 4, 5, 10 ); + +my $profile_info = { + '1' => 'user', + '4' => 'cat', + '5' => 'circ', + '10' => 'admin', +}; + +my $sql = "BEGIN;\n"; + +foreach my $i (1..40) { + foreach my $profile (@profiles) { + my $twodigit = sprintf('%02d', $i); + my $barcode = '1' . ($profile == 10 ? '0' : $profile) . $twodigit; + my $usrname = $profile_info->{$profile . ""} . $i; + my $family_name = ucfirst($usrname); + my $org_unit = get_org_unit($i); + $sql .= < 'utf8', RecordFormat => 'USMARC' ); +use MARC::Batch; + +# Clean up URIs from MARCXML records prior to batch ingest +# * If we detect a proxy URL: +# * Ensure ind1 = 4 +# * Ensure ind2 = 2 +# * Ensure $9 = aou.shortname +# * Trim whitespace and other tweaks while we're at it? + +my ($input_file, $output_file); +GetOptions( + 'input=s' => \$input_file, + 'output=s' => \$output_file +); + +if (!$input_file or !$output_file) { + print("Please specify the following options:\n"); + print("\t--input : input file of MARCXML records\n"); + print("\t--output : output file of processed MARCXML records\n"); + exit(); +} + +my $input = new MARC::Batch( 'XML', $input_file ); +$input->strict_off(); +my $output = MARC::File::XML->out( $output_file ); + +my ($touched, $url_cnt, $ind1_cnt, $ind2_cnt, $sub9_cnt) = (0, 0, 0, 0, 0); +my $marc; +while ( try { $marc = $input->next() } otherwise { $marc = -1 } ) { + # Skip the record if we couldn't even decode it + next if ($marc == -1); + + my $edited = 0; + my @uri_fields = $marc->field('856'); + foreach my $uri (@uri_fields) { + my ($orgunit); + + # There's no way we should have multiples, but let's iterate anyway + my @urls = $uri->subfield('u'); + + foreach my $url (@urls) { + # For general use we should factor these out to a hash. Oh well. + + # We're filtering by proxy address, because theoretically anything + # that is not proxied is open to the world to access and doesn't + # need to be treated as a URI particular to that org_unit + if ($url =~ m/librweb.laurentian.ca/o) { + $orgunit = 'OSUL'; + } elsif ($url =~ m/libproxy.auc.ca/o) { + $orgunit = 'OSTMA'; + } elsif ($url =~ m/normedproxy.lakeheadu.ca/o) { + $orgunit = 'OSM'; + } elsif ($url =~ m/ezproxy.uwindsor.ca/o or $url =~ m/webvoy.uwindsor.ca/o ) { + $orgunit = 'OWA'; + } + + if ($orgunit) { + my $clean_url = $url; + $clean_url =~ s/^\s*(.*?)\s*$/$1/o; + if ($url ne $clean_url) { + $uri->update(u => $clean_url); + $edited++; + $url_cnt++; + } + + my $ind1 = $uri->indicator(1); + if ($ind1 and $ind1 ne '1' and $ind1 ne '4') { + $uri->update(ind1 => '4'); + $edited++; + $ind1_cnt++; + } + + my $ind2 = $uri->indicator(2); + if ($ind2 and $ind2 ne '0' and $ind2 ne '1') { + $uri->update(ind2 => '1'); + $edited++; + $ind2_cnt++; + } + + # Risking that we only have one subfield 9 here + # Should be a slight risk as it's not defined in the spec + my $aou = $uri->subfield('9'); + if (!$aou or $aou ne $orgunit) { + $uri->update(9 => $orgunit); + $edited++; + $sub9_cnt++; + } + } + } + } + if ($edited) { + $touched++; + } + $output->write($marc); +} +$output->close(); +print "Touched $touched records to fix URIs.\n"; +print "\t$url_cnt URLs were touched\n"; +print "\t$ind1_cnt indicator 1 values were touched\n"; +print "\t$ind2_cnt indicator 2 values were touched\n"; +print "\t$sub9_cnt subfield '9' values were touched\n"; + +# vim: et:ts=4:sw=4: diff --git a/tools/migration-scripts/fix_bad_marcxml.pl b/tools/migration-scripts/fix_bad_marcxml.pl new file mode 100644 index 0000000000..2e45dbd3c5 --- /dev/null +++ b/tools/migration-scripts/fix_bad_marcxml.pl @@ -0,0 +1,65 @@ +#!/usr/bin/perl +use strict; +use warnings; + +foreach my $file (@ARGV) { + process_file($file); +} + +sub process_file { + my $file = shift; + + # Empty datafields anger MARC::File::XML + open(FH, '<', $file) or die $!; + open(CLEAN, '>', "$file.new"); + + my ($trim, $lastline, $lineno) = (0, '', 1); + while () { + if ($_ =~ m## and $lastline =~ m# + # In subtitle "sports" appears as " + # ort + # . + # + # + # This will at least enable MARC::File::XML to process it: + if ($_ =~ m##o or $_ =~ m##o ) { + print STDERR "Bad subfield code \" at line $lineno of file $file\n"; + $_ =~ s{}{}o; + $_ =~ s{}{}o; + } elsif ($_ =~ m##o or $_ =~ m##o) { + print STDERR "Bad subfield code < at line $lineno of file $file\n"; + $_ =~ s{}{}o; + $_ =~ s{}{}o; + } elsif ($_ =~ m##o or $_ =~ m##o) { + print STDERR "Bad subfield code & at line $lineno of file $file\n"; + $_ =~ s{}{}o; + $_ =~ s{}{}o; + } elsif ($_ =~ m# 'utf-8' ); +use MARC::Charset; +use DBI; + +my ($marcfile, $marctype, $enc, $config, $username, $password) = ('/openils/migration/windsor/bib20090430.mrc', 'USMARC', 'UTF8', '/openils/conf/opensrf_core.xml'); + +GetOptions( + 'encoding=s' => \$enc, # set assumed MARC encoding for MARC::Charset + 'config=s' => \$config, # location of OpenSRF core config file, defaults to /openils/conf/opensrf_core.xml + "username=s" => \$username, # EG username + "password=s" => \$password, # EG password +); + +if ($enc) { + MARC::Charset->ignore_errors(1); + MARC::Charset->assume_encoding($enc); +} + +OpenSRF::System->bootstrap_client( config_file => $config ); + +# Login to Evergreen and get an authentication token +my $auth = oils_login($username, $password); +if (!$auth) { + die "Could not retrieve an authentication token"; +} + +select STDERR; $| = 1; +select STDOUT; $| = 1; +binmode STDOUT, ":utf8"; + +my $batch = new MARC::Batch ( $marctype, $marcfile ); +$batch->strict_off(); +$batch->warnings_off(); + +my $starttime = time; +my $rec; +my $count = 0; +my $rec_count = 0; +PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { + next if ($rec == -1); + + $count++; + + if ($rec->as_formatted =~ m/[^\x00-\x7f]/) { + $rec_count++; + print "$rec_count of $count\n"; + update_id_field(\$rec); + fix_URIs(\$rec); + update_marc(\$rec); + + # Exit nice and early so that we don't wander off and update a whole batch without testing + if ($rec_count > 0) { + exit; + } + } + +} + +# Set the 001 and 901 to our record ID in Conifer +# Windsor records are offset by 1 million from their legacy ID +sub update_id_field { + my $rec = shift; + + my $tcn = $$rec->field('001'); + my $rec_id = $tcn->data + 1000000; + $tcn->update($rec_id); + my $id_field = MARC::Field->new('901', '', '', 'a' => $rec_id, 'b' => 'Unknown', 'c' => $rec_id); + $$rec->append_fields($id_field); +} + +sub fix_URIs { + my $marc = shift; + + my @uri_fields = $$marc->field('856'); + foreach my $uri (@uri_fields) { + my ($orgunit); + + # There's no way we should have multiples, but let's iterate anyway + my @urls = $uri->subfield('u'); + + foreach my $url (@urls) { + # For general use we should factor these out to a hash. Oh well. + + # We're filtering by proxy address, because theoretically anything + # that is not proxied is open to the world to access and doesn't + # need to be treated as a URI particular to that org_unit + if ($url =~ m/librweb.laurentian.ca/o) { + $orgunit = 'OSUL'; + } elsif ($url =~ m/libproxy.auc.ca/o) { + $orgunit = 'OSTMA'; + } elsif ($url =~ m/normedproxy.lakeheadu.ca/o) { + $orgunit = 'OSM'; + } elsif ($url =~ m/ezproxy.uwindsor.ca/o or $url =~ m/webvoy.uwindsor.ca/o ) { + $orgunit = 'OWA'; + } + + if ($orgunit) { + my $clean_url = $url; + $clean_url =~ s/^\s*(.*?)\s*$/$1/o; + if ($url ne $clean_url) { + $uri->update(u => $clean_url); + } + + my $ind1 = $uri->indicator(1); + if ($ind1 and $ind1 ne '1' and $ind1 ne '4') { + $uri->update(ind1 => '4'); + } + + my $ind2 = $uri->indicator(2); + if ($ind2 and $ind2 ne '0' and $ind2 ne '1') { + $uri->update(ind2 => '1'); + } + + # Risking that we only have one subfield 9 here + # Should be a slight risk as it's not defined in the spec + my $aou = $uri->subfield('9'); + if (!$aou or $aou ne $orgunit) { + $uri->update(9 => $orgunit); + } + } + } + } +} + +sub update_marc { + my $rec = shift; + + # Borrowed from marc2bre.pl to get clean XML + (my $xml = $$rec->as_xml_record()) =~ s/\n//sog; + $xml =~ s/^<\?xml.+\?\s*>//go; + $xml =~ s/>\s+entityize($xml); + $xml =~ s/[\x00-\x1f]//go; + + # Update and ingest this puppy + my $update = OpenILS::Application::AppUtils->simplereq('open-ils.cat', + 'open-ils.cat.biblio.record.xml.update', + ($auth, int($$rec->field('001')->data), $xml) + ); + + # Return the cleaned-up XML in case we want to inspect it + return $xml; +} + + diff --git a/tools/migration-scripts/generate_copies.sql b/tools/migration-scripts/generate_copies.sql new file mode 100644 index 0000000000..7dadd9ac15 --- /dev/null +++ b/tools/migration-scripts/generate_copies.sql @@ -0,0 +1,200 @@ +BEGIN; +SET search_path TO scratchpad, public; + +-- Map libraries +UPDATE staging_items + SET owning_lib = 'OSUL' + WHERE owning_lib = 'DESMARAIS'; + +UPDATE staging_items + SET owning_lib = 'OSTMA' + WHERE owning_lib = 'ALGOMA'; + +UPDATE staging_items + SET owning_lib = 'CRC' + WHERE location = 'ELSE-CURR'; + +UPDATE staging_items + SET owning_lib = 'VALE' + WHERE location = 'INCO'; + +UPDATE staging_items + SET owning_lib = 'LDRC' + WHERE location = 'EDUCATION'; + +UPDATE staging_items + SET owning_lib = 'OSM', location = 'WWW' + WHERE location = 'HIRC-WWW'; + +UPDATE staging_items + SET owning_lib = 'MRC' + WHERE location = 'DESM-MRC'; + +UPDATE staging_items + SET owning_lib = 'NOSME' + WHERE location IN ('HIRCE-AV', 'HIRCE-BOOK', 'HIRCE-CIRC', 'HIRCE-PER', 'HIRCE-REF'); + +UPDATE staging_items + SET owning_lib = 'NOSMW' + WHERE location IN ('HIRCW-AV', 'HIRCW-BOOK', 'HIRCW-CIRC', 'HIRCW-PER', 'HIRCW-REF'); + +UPDATE staging_items + SET owning_lib = 'OSM' + WHERE owning_lib = 'HIRC'; + +UPDATE staging_items + SET owning_lib = 'OWA' + WHERE owning_lib = 'Leddy'; + +UPDATE staging_items + SET owning_lib = 'OWAL' + WHERE owning_lib = 'WINLAW'; + +UPDATE staging_items + SET owning_lib = 'KAP' + WHERE location IN (SELECT location FROM staging_items WHERE location LIKE 'HRSTK-%'); + +UPDATE staging_items + SET owning_lib = 'TIMMINS' + WHERE location IN (SELECT location FROM staging_items WHERE location LIKE 'HRSTT-%'); + +UPDATE staging_items + SET owning_lib = 'SAH' + WHERE location = 'NEORCC'; + +UPDATE staging_items + SET owning_lib = 'XSTRATA' + WHERE location = 'FALCON'; + +-- Map item types +UPDATE staging_items + SET item_type = 'BOOK' + WHERE item_type IN ('BOOKS'); + +UPDATE staging_items + SET item_type = 'MICROFORM' + WHERE item_type = 'MICROFORMS'; + +UPDATE staging_items + SET item_type = 'NEWSPAPER' + WHERE item_type = 'NEWSPAPERS'; + +-- Map locations +UPDATE staging_items + SET location = 'AV' + WHERE location IN ('HIRCE-AV', 'HIRCW-AV'); + +UPDATE staging_items + SET location = 'BOOK' + WHERE location IN ('HIRCE-BOOK', 'HIRCW-BOOK'); + +UPDATE staging_items + SET location = 'CIRC' + WHERE location IN ('HIRCE-CIRC', 'HIRCW-CIRC'); + +UPDATE staging_items + SET location = 'PER' + WHERE location IN ('HIRCE-PER', 'HIRCW-PER'); + +UPDATE staging_items + SET location = 'REF' + WHERE location IN ('HIRCE-REF', 'HIRCW-REF'); + +UPDATE staging_items + SET location = 'DOC' + WHERE location IN ('HRST-DOC', 'HRSTK-DOC', 'HRSTT-DOC'); + +UPDATE staging_items + SET location = 'EBOOK' + WHERE location IN ('HRST-EBOOK', 'HRSTK-EBOOK', 'HRSTT-EBOOK'); + +UPDATE staging_items + SET location = 'PER' + WHERE location IN ('HRST-PER', 'HRSTK-PER', 'HRSTT-PER'); + +UPDATE staging_items + SET location = 'PRET' + WHERE location IN ('HRST-PRET', 'HRSTK-PRET', 'HRSTT-PRET'); + +UPDATE staging_items + SET location = 'REF' + WHERE location IN ('HRST-REF', 'HRSTK-REF', 'HRSTT-REF'); + +UPDATE staging_items + SET location = 'VID' + WHERE location IN ('HRST-VID', 'HRSTK-VID', 'HRSTT-VID'); + +-- First, we build shelving location +INSERT INTO asset.copy_location (name, owning_lib) + SELECT DISTINCT l.location, ou.id + FROM staging_items l JOIN actor.org_unit ou + ON (l.owning_lib = ou.shortname) +; + +-- Create circ modifiers for in-db circulation +-- This is very, very crude but satisfies the FK constraints +INSERT INTO config.circ_modifier (code, name, description, sip2_media_type, magnetic_media) + SELECT DISTINCT item_type as code, + item_type AS name, + LOWER(item_type) AS description, + '001' AS sip2_media_type, + FALSE AS magnetic_media + FROM staging_items + WHERE item_type NOT IN (SELECT code FROM config.circ_modifier); + +-- Import call numbers for bibrecord->library mappings +INSERT INTO asset.call_number (creator,editor,record,label,owning_lib) + SELECT DISTINCT 1, 1, l.bibkey , l.callnum, ou.id + FROM staging_items l + JOIN biblio.record_entry b ON (l.bibkey = b.id) + JOIN actor.org_unit ou ON (l.owning_lib = ou.shortname); + +-- Import base copy data +INSERT INTO asset.copy ( + circ_lib, creator, editor, create_date, barcode, + status, location, loan_duration, + fine_level, circ_modifier, deposit, ref, call_number) + SELECT DISTINCT ou.id AS circ_lib, + 1 AS creator, + 1 AS editor, + l.createdate AS create_date, + l.barcode AS barcode, + CASE + WHEN l.location = 'BINDERY' THEN 2 + WHEN l.location = 'CATALOGING' THEN 11 + WHEN l.location = 'DISCARD' THEN 13 + WHEN l.location = 'ILL' THEN 10 + WHEN l.location = 'INPROCESS' THEN 5 + WHEN l.location = 'LOST' THEN 3 + WHEN l.location = 'LONGOVRDUE' THEN 4 + WHEN l.location = 'MISSING' THEN 4 + WHEN l.location = 'ON-ORDER' THEN 9 + WHEN l.location = 'REPAIR' THEN 14 + ELSE 0 + END AS status, + cl.id AS location, + 2 AS loan_duration, + 2 AS fine_level, + CASE + WHEN l.item_type IN ('REFERENCE', 'DEPOSIT_BK', 'BOOKS') THEN 'BOOK' + WHEN l.item_type IN ('PERIODICALS') THEN 'PERIODICAL' + ELSE l.item_type + END AS circ_modifier, + CASE + WHEN l.item_type = 'DEPOSIT_BK' THEN TRUE + ELSE FALSE + END AS deposit, + CASE + WHEN l.item_type = 'REFERENCE' THEN TRUE + ELSE FALSE + END AS ref, + cn.id AS call_number + FROM staging_items l + JOIN actor.org_unit ou + ON (l.owning_lib = ou.shortname) + JOIN asset.copy_location cl + ON (ou.id = cl.owning_lib AND l.location = cl.name) + JOIN asset.call_number cn + ON (ou.id = cn.owning_lib AND l.bibkey = cn.record AND l.callnum = cn.label) +; +COMMIT; diff --git a/tools/migration-scripts/lul_import.pl b/tools/migration-scripts/lul_import.pl new file mode 100644 index 0000000000..15857c7f9f --- /dev/null +++ b/tools/migration-scripts/lul_import.pl @@ -0,0 +1,94 @@ +#!/usr/bin/perl +use warnings; +use strict; + +=head1 Automated processing of Laurentian system bibliographic records + +First we split the records into many smaller files (defined by $chunksize), +then process a fraction of the records (defined by 1/$denominator) from +across the set of files. + +=cut + +my $chunk_size = 1000; +my $chunk_dir = "chunks"; +my $chunk_prefix = 'chunk_'; +my $denominator = 20; + +my $marc = 'bibs.marc'; +my $xml_file = 'bibs.xml'; +my $bre_file = 'bibs.bre'; +my $ingest_file = 'bibs.ingest'; +my $loader_prefix = 'lul_load'; +my $tcn_dumpfile = 'tcn.dump'; +my $script_dir = '/home/lauadmin/Evergreen/Open-ILS/src/extras/import/'; +my $db_user = 'evergreen'; +my $db_pw = ''; +my $db_host = ''; +my $db_name = 'evergreen'; + +my $input_files; + +convert_to_marcxml("true"); +process_bibs("--tcnfield 935 --tcnsubfield a --idfield 935 --idsubfield a --trash 901 --trash 949"); + +=over + +=item convert_to_marcxml(is_marc8) + +=back + +=cut +sub convert_to_marcxml { + my $is_marc8 = shift; + my $marc8_to_utf8; + +=over + +Split the files up into chunks + +=back + +=cut + if (!-d $chunk_dir) { + mkdir($chunk_dir); + } + + system("yaz-marcdump -C $chunk_size -s chunks/$chunk_prefix $marc > /dev/null"); + +=over + +Convert selected sample of records to MARC21XML + +=back + +=cut + my @files = glob("$chunk_dir/$chunk_prefix\*"); + + foreach my $i (1..$denominator) { + my $filenumber = sprintf("%07i", (scalar(@files) / $denominator * $i) - 1); + $input_files .= "$chunk_dir/$chunk_prefix$filenumber "; + } + if ($is_marc8) { + $marc8_to_utf8 = '-f MARC-8 -t UTF-8 -l 9=97'; + } + system("yaz-marcdump -i marc -o marcxml $marc8_to_utf8 $input_files > $xml_file"); +} + +=over + +=item process_bibs(marc2bre_options) + +Starting with a set of MARC21XML records, these commands generate a set of +SQL files suitable for loading into an Evergreen system. + +=back + +=cut +sub process_bibs { + my $marc2bre_options = shift; + + system("perl $script_dir/marc2bre.pl --marctype XML --tcn_dumpfile $tcn_dumpfile --db_user $db_user --db_host $db_host --db_pw $db_pw --db_name $db_name $marc2bre_options $xml_file > $bre_file 2> marc2bre.err"); + system("perl $script_dir/direct_ingest.pl $bre_file > $ingest_file 2> ingest.err"); + system("perl $script_dir/parallel_pg_loader.pl --output $loader_prefix -or bre -or mrd -or mfr -or mtfe -or mafe -or msfe -or mkfe -or msefe -a mrd -a mfr -a mtfe -a mafe -a msfe -a mkfe -a msefe < $ingest_file 2> loader.err"); +} diff --git a/tools/migration-scripts/org_units.sql b/tools/migration-scripts/org_units.sql new file mode 100644 index 0000000000..7bd3a21abf --- /dev/null +++ b/tools/migration-scripts/org_units.sql @@ -0,0 +1,79 @@ +-- +-- PostgreSQL database dump +-- + +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = off; +SET check_function_bodies = false; +SET client_min_messages = warning; +SET escape_string_warning = off; + +SET search_path = actor, pg_catalog; + +-- +-- Name: org_unit_type_id_seq; Type: SEQUENCE SET; Schema: actor; Owner: evergreen +-- + +SELECT pg_catalog.setval('org_unit_type_id_seq', 100, true); + + +-- +-- Data for Name: org_unit; Type: TABLE DATA; Schema: actor; Owner: evergreen +-- + +COPY org_unit (id, parent_ou, ou_type, ill_address, holds_address, mailing_address, billing_address, shortname, name, email, phone, opac_visible) FROM stdin; +115 102 3 \N \N \N \N HRSRH HRSRH Health Sciences Library t +123 102 3 \N \N \N \N NEMHC Northeast Mental Health Centre \N \N t +105 102 6 1 1 1 1 LUSYS Laurentian University t +108 105 7 \N \N \N \N MEDIACEN Instructional Media Centre t +103 105 7 1 1 1 1 OSUL J.N. Desmarais Library t +107 105 7 \N \N \N \N SUDBURY University of Sudbury t +104 105 7 1 1 1 1 HUNTINGTON Huntington College Library t +117 105 7 \N \N \N \N MEDB Mining and the Environment Database t +1 \N 1 1 1 1 1 CONIFER Conifer t +102 1 4 1 1 1 1 LAURSYS Laurentian System t +106 1 2 \N \N \N \N WINDSYS Windsor System t +109 106 3 \N \N \N \N OWA Leddy Library t +112 102 3 \N \N \N \N ARTGALL Art Gallery of Sudbury t +113 102 3 \N \N \N \N CFOF Centre Franco-Ontarien de Folklore t +116 102 3 \N \N \N \N SAH Sault Area Hospital t +118 102 3 \N \N \N \N MNDM Mines Library, Willet Green Miller Centre t +119 102 3 \N \N \N \N XSTRATA Xstrata Process Support Centre Library t +120 102 3 \N \N \N \N VALE Vale Inco t +122 106 3 \N \N \N \N OWAL Paul Martin Law Library t +111 1 2 \N \N \N \N ALGOMASYS Algoma System \N \N t +124 111 3 \N \N \N \N OSTMA Algoma University, Wishart Library t +125 1 2 \N \N \N \N OSM NOHIN \N \N t +126 125 3 \N \N \N \N NOSMW Northern Ontario School of Medicine (West) \N \N t +110 125 3 \N \N \N \N NOSME Northern Ontario School of Medicine (East) t +114 102 6 \N \N \N \N HEARSTSYS Université de Hearst t +127 114 7 \N \N \N \N HEARST Hearst, Bibliothèque Maurice-Saulnier \N \N t +128 114 7 \N \N \N \N KAP Hearst Kapuskasing, Centre de Ressources \N \N t +129 114 7 \N \N \N \N TIMMINS Hearst Timmins, Centre de Ressources \N \N t +130 105 7 \N \N \N \N CRC Curriculum Resource Centre \N \N t +131 105 7 \N \N \N \N MRC Music Resource Centre \N \N t +132 105 7 \N \N \N \N LDCR Laboratoire de didactiques, E.S.E. \N \N t +\. + + + +-- +-- Data for Name: org_unit_type; Type: TABLE DATA; Schema: actor; Owner: evergreen +-- + +COPY org_unit_type (id, name, opac_label, depth, parent, can_have_vols, can_have_users) FROM stdin; +1 Consortium Everywhere 0 \N f f +2 System University Libraries 1 1 f f +4 LU System University and Partners 1 1 f f +5 Bookmobile Your Bookmobile 3 3 t t +7 University-Library University Library 3 6 t t +3 Branch This Branch 2 2 t t +6 University Campus Libraries 2 2 t t +\. + + +-- +-- PostgreSQL database dump complete +-- + +SELECT SETVAL('actor.org_unit_id_seq', (SELECT MAX(id) FROM actor.org_unit)); diff --git a/tools/migration-scripts/patron_groups.sql b/tools/migration-scripts/patron_groups.sql new file mode 100644 index 0000000000..a8bf17bf55 --- /dev/null +++ b/tools/migration-scripts/patron_groups.sql @@ -0,0 +1,58 @@ +BEGIN; + +-- Set up patron groups and permissions + +INSERT INTO permission.grp_tree (name, parent, usergroup, perm_interval, description, application_perm) VALUES ('Faculty', 2, 't', '1 year', 'Faculty', 'group_application.user.patron'); +INSERT INTO permission.grp_tree (name, parent, usergroup, perm_interval, description, application_perm) VALUES ('Graduate', 2, 't', '1 year', 'Graduate', 'group_application.user.patron'); +INSERT INTO permission.grp_tree (name, parent, usergroup, perm_interval, description, application_perm) VALUES ('Undergraduate', 2, 't', '1 year', 'Undergraduate', 'group_application.user.patron'); +INSERT INTO permission.grp_tree (name, parent, usergroup, perm_interval, description, application_perm) VALUES ('Readers', 2, 't', '1 year', 'Readers', 'group_application.user.patron'); +INSERT INTO permission.grp_tree (name, parent, usergroup, perm_interval, description, application_perm) VALUES ('Staff members', 2, 't', '1 year', 'Staff members', 'group_application.user.patron'); + +-- (11 = Faculty, 12 = Graduate, 13 = Undergraduate, 14 = Reader, 15 = Staff members) + +-- Not really necessary, unless you want to restrict the ability to add +INSERT INTO permission.perm_list (code, description) VALUES + ('group_application.user.patron.faculty', 'Allow a user to add/remove users to/from the "Faculty" group'), + ('group_application.user.patron.grad', 'Allow a user to add/remove users to/from the "Graduate students" group'), + ('group_application.user.patron.undergrad', 'Allow a user to add/remove users to/from the "Undergraduate students" group') + ('group_application.user.patron.reader', 'Allow a user to add/remove users to/from the "Readers" group'), + ('group_application.user.patron.staff', 'Allow a user to add/remove users to/from the "Staff members" group'), +; + +-- Give circulators the ability to abort transits +-- ABORT_TRANSIT = perm 111, ABORT_REMOTE_TRANSIT = perm 112 +INSERT INTO permission.grp_perm_map (grp, perm, depth) + VALUES (5, 111, 2), (5, 112, 2); + +-- Enable all staff to cancel holds +INSERT INTO permission.grp_perm_map (grp, perm, depth) + VALUES (3, 114, 0); + +-- Enable cataloguers to import bib records +-- Cataloguer profile group = 4 +-- Depth (how much of the org_tree does the user have this permission over) = 0 (the whole thing) +INSERT INTO permission.grp_perm_map (grp, depth, perm) + SELECT 4, 0, id from permission.perm_list + WHERE code LIKE '%IMPORT%' AND id NOT IN ( + SELECT perm FROM permission.grp_perm_map + WHERE grp = 4 + ) +; + +-- Grant ability to merge bib records to cataloguers +INSERT INTO permission.grp_perm_map (grp, perm, depth) + VALUES (4, 230, 0); + +-- Grant ability to delete bib records to cataloguers +INSERT INTO permission.grp_perm_map (grp, perm, depth) + VALUES (4, 153, 0); + +-- Grant ability to add, delete, or update closing dates +INSERT INTO permission.grp_perm_map (grp, perm, depth) + VALUES (10, 117, 1), (10, 116, 1), (10, 118, 1); + +-- Restrict visibility of patrons to staff in related institutions +UPDATE permission.grp_perm_map SET depth = 2 + WHERE grp = 3 AND perm = 31; + +COMMIT; diff --git a/tools/migration-scripts/sip_user.sql b/tools/migration-scripts/sip_user.sql new file mode 100644 index 0000000000..9b3759238f --- /dev/null +++ b/tools/migration-scripts/sip_user.sql @@ -0,0 +1,29 @@ +-- Basic SQL for setting up SIP client user group and associated permissions +BEGIN; + +-- Create the SIP user group +INSERT INTO permission.grp_tree (name, parent, usergroup, perm_interval, description, application_perm) + VALUES ('SIP-Client', 1, 't', '3 years', 'SIP server interactions', 'group_application.user.sip_client'); + +-- Create the permissions for the SIP user group +INSERT INTO permission.grp_perm_map (grp, depth, perm) + SELECT currval('permission.grp_tree_id_seq'), 0, id + FROM permission.perm_list + WHERE code IN ( + 'COPY_CHECKIN', + 'COPY_CHECKOUT', + 'RENEW_CIRC', + 'VIEW_CIRCULATIONS', + 'VIEW_COPY_CHECKOUT_HISTORY', + 'VIEW_PERMIT_CHECKOUT', + 'VIEW_USER', + 'VIEW_USER_FINES_SUMMARY', + 'VIEW_USER_TRANSACTIONS' + ) +; + +-- Give local system administrators (10) the permission (135) to create a SIP-Client user +INSERT INTO permission.grp_perm_map (grp, perm, depth) + VALUES (10, 135, 0); + +COMMIT; diff --git a/tools/migration-scripts/windsor_import.pl b/tools/migration-scripts/windsor_import.pl new file mode 100644 index 0000000000..f6561718f0 --- /dev/null +++ b/tools/migration-scripts/windsor_import.pl @@ -0,0 +1,94 @@ +#!/usr/bin/perl +use warnings; +use strict; + +=head1 Automated processing of Windsor system bibliographic records + +First we split the records into many smaller files (defined by $chunksize), +then process a fraction of the records (defined by 1/$denominator) from +across the set of files. + +=cut + +my $chunk_size = 1000; +my $chunk_dir = "chunks"; +my $chunk_prefix = 'chunk_'; +my $denominator = 20; + +my $marc = 'windsor_bibs.marc'; +my $xml_file = 'windsor_bibs.xml'; +my $bre_file = 'bibs.bre'; +my $ingest_file = 'bibs.ingest'; +my $loader_prefix = 'windsor_load'; +my $tcn_dumpfile = 'tcn.dump'; +my $script_dir = '/home/lauadmin/Evergreen-trunk/Open-ILS/src/extras/import/'; +my $db_user = 'evergreen'; +my $db_pw = ''; +my $db_host = ''; +my $db_name = ''; + +my $input_files; + +convert_to_marcxml("false"); +process_bibs("--tcnfield 001 --idfield 001"); + +=over + +=item convert_to_marcxml(is_marc8) + +=back + +=cut +sub convert_to_marcxml { + my $is_marc8 = shift; + my $marc8_to_utf8; + +=over + +Split the files up into chunks + +=back + +=cut + if (!-d $chunk_dir) { + mkdir($chunk_dir); + } + + system("yaz-marcdump -C $chunk_size -s chunks/$chunk_prefix $marc > /dev/null"); + +=over + +Convert selected sample of records to MARC21XML + +=back + +=cut + my @files = glob("$chunk_dir/$chunk_prefix\*"); + + foreach my $i (1..$denominator) { + my $filenumber = sprintf("%07i", (scalar(@files) / $denominator * $i) - 1); + $input_files .= "$chunk_dir/$chunk_prefix$filenumber "; + } + if ($is_marc8) { + $marc8_to_utf8 = '-f MARC-8 -t UTF-8 -l 9=97'; + } + system("yaz-marcdump -i marc -o marcxml $marc8_to_utf8 $input_files > $xml_file"); +} + +=over + +=item process_bibs(marc2bre_options) + +Starting with a set of MARC21XML records, these commands generate a set of +SQL files suitable for loading into an Evergreen system. + +=back + +=cut +sub process_bibs { + my $marc2bre_options = shift; + + system("perl $script_dir/windsor_marc2bre.pl --marctype XML --tcn_dumpfile $tcn_dumpfile --db_user $db_user --db_host $db_host --db_pw $db_pw --db_name $db_name $marc2bre_options $xml_file > $bre_file 2> marc2bre.err"); + system("perl $script_dir/direct_ingest.pl $bre_file > $ingest_file 2> ingest.err"); + system("perl $script_dir/parallel_pg_loader.pl --output $loader_prefix -or bre -or mrd -or mfr -or mtfe -or mafe -or msfe -or mkfe -or msefe -a mrd -a mfr -a mtfe -a mafe -a msfe -a mkfe -a msefe < $ingest_file 2> loader.err"); +} diff --git a/tools/migration-scripts/windsor_patron_load_base.csv b/tools/migration-scripts/windsor_patron_load_base.csv new file mode 100644 index 0000000000..978a591c27 --- /dev/null +++ b/tools/migration-scripts/windsor_patron_load_base.csv @@ -0,0 +1 @@ +1 FAC dbs@example.com 12345000012345 1 Other 12345000012345 \N DAN B SCOTT \N 555-5555 x. \N \N Must have Card Leddy t f f 9-Jun-97 30-Apr-11 diff --git a/tools/migration-scripts/windsor_patrons.sql b/tools/migration-scripts/windsor_patrons.sql new file mode 100644 index 0000000000..46ca2a662e --- /dev/null +++ b/tools/migration-scripts/windsor_patrons.sql @@ -0,0 +1,260 @@ +DROP TABLE staging_patron; +DROP TABLE staging_patron_address; +DROP TABLE staging_barcode; +DROP TABLE staging_note; +DROP TABLE staging_mobile_phone; +DROP TABLE staging_other_phone; + +BEGIN; + +CREATE TABLE staging_patron (idfield SERIAL NOT NULL, username TEXT, profile TEXT, identity_type TEXT, password TEXT, standing INTEGER, identity_type2 TEXT, identity_value TEXT, name_prefix TEXT, first_given_name TEXT, second_given_name TEXT, family_name TEXT, name_suffix TEXT, day_phone TEXT, evening_phone TEXT, other_phone TEXT, alert_message TEXT, home_library TEXT, active BOOLEAN, barred BOOLEAN, deleted BOOLEAN, create_date DATE, expire_date DATE); +CREATE TABLE staging_patron_address (idfield SERIAL NOT NULL, barcode TEXT, address_type TEXT, street_1 TEXT, street_2 TEXT, city TEXT, county TEXT, province TEXT, country TEXT, postal_code TEXT); +CREATE TABLE staging_barcode (idfield SERIAL NOT NULL, barcode TEXT, old_barcode TEXT, active BOOLEAN); +CREATE TABLE staging_note (idfield SERIAL NOT NULL, barcode TEXT, create_date TEXT, publicly_visible BOOLEAN, title TEXT, note TEXT, create_date2 TEXT); +CREATE TABLE staging_mobile_phone (idfield SERIAL NOT NULL, barcode TEXT, phone TEXT); +CREATE TABLE staging_other_phone (idfield SERIAL NOT NULL, barcode TEXT, phone TEXT); + +COPY staging_patron (username, profile, identity_type, password, standing, identity_type2, identity_value, name_prefix, first_given_name, second_given_name, family_name, name_suffix, day_phone, evening_phone, other_phone, alert_message, home_library, active, barred, deleted, create_date, expire_date) FROM '/home/dbs/conifer/windsor_patron_load_base.csv'; +COPY staging_patron_address (barcode, address_type, street_1, street_2, city, county, province, country, postal_code) FROM '/home/dbs/conifer/windsor_patron_load_addresses.csv'; +COPY staging_barcode (barcode, old_barcode, active) FROM '/home/dbs/conifer/windsor_patron_load_barcodes.csv'; +COPY staging_note (barcode, create_date, publicly_visible, title, note, create_date2) FROM '/home/dbs/conifer/windsor_patron_load_notes.csv'; +COPY staging_mobile_phone (barcode, phone) FROM '/home/dbs/conifer/windsor_patron_load_phones_mobile.csv'; +COPY staging_other_phone (barcode, phone) FROM '/home/dbs/conifer/windsor_patron_load_phones_other.csv'; + +COMMIT; + +---- Let's find our duplicate usernames +--SELECT trim(both from username), identity_value, COUNT(username) as ucount +--FROM staging_patron +--GROUP BY username, identity_value +--HAVING COUNT(username) > 1 +--ORDER BY ucount DESC; +-- +---- Now let's find our duplicate barcodes +--SELECT trim(both from username), identity_value, COUNT(identity_value) as ucount +--FROM staging_patron +--GROUP BY username, identity_value +--HAVING COUNT(identity_value) > 1 +--ORDER BY ucount DESC; +-- +---- Get the distinct set of values for dupe usernames +---- including active/barred/deleted status, just in case +--SELECT DISTINCT trim(both from username), identity_value, active, barred, deleted +--FROM staging_patron +--WHERE username IN ( +-- SELECT username +-- FROM staging_patron +-- GROUP BY username +-- HAVING count(username) > 1 +--); +-- +---- Do the barcodes for dupe usernames exist over in the staging_barcode table? +--SELECT DISTINCT TRIM(p.username) AS uname, p.identity_value, +-- CASE +-- WHEN p.identity_value IN (SELECT barcode FROM staging_barcode WHERE barcode = p.identity_value AND active = 't') THEN 'active new' +-- WHEN p.identity_value IN (SELECT barcode FROM staging_barcode WHERE old_barcode = p.identity_value AND active = 't') THEN 'active old' +-- WHEN p.identity_value IN (SELECT barcode FROM staging_barcode WHERE barcode = p.identity_value AND active = 'f') THEN 'inactive new' +-- WHEN p.identity_value IN (SELECT barcode FROM staging_barcode WHERE old_barcode = p.identity_value AND active = 'f') THEN 'inactive old' +-- ELSE 'not found' +-- END AS barcode_state +-- FROM staging_patron p +-- WHERE p.identity_value IN ( +-- SELECT DISTINCT identity_value +-- FROM staging_patron +-- WHERE username IN ( +-- SELECT username +-- FROM staging_patron +-- GROUP BY username +-- HAVING COUNT(username) > 1 +-- ) +-- ) +-- ORDER BY uname +--; +-- +-- Get rid of the username dupes in a savage manner; last one entered wins +DELETE FROM staging_patron + WHERE idfield NOT IN ( + SELECT MAX(dt.idfield) + + FROM staging_patron dt + GROUP BY dt.username + ) +; + +DELETE FROM staging_patron + WHERE idfield NOT IN ( + SELECT MAX(dt.idfield) + + FROM staging_patron dt + GROUP BY dt.identity_value + ) +; + +-- And get rid of duplicate (old) barcodes +DELETE FROM staging_barcode + WHERE idfield NOT IN ( + SELECT MAX(dt.idfield) + + FROM staging_barcode dt + GROUP BY dt.old_barcode + ) +; + +SELECT COUNT(*) FROM staging_patron; + +BEGIN; + +INSERT INTO actor.usr (profile, usrname, passwd, standing, ident_type, ident_value, first_given_name, second_given_name, family_name, day_phone, home_ou, active, barred, deleted, alert_message, create_date, expire_date) + SELECT DISTINCT + CASE + -- Faculty + WHEN trim(both from patron.profile) IN ('AFAC', 'FAC', 'LAW FACLTY') THEN 11 + -- Graduate student + WHEN trim(both from patron.profile) IN ('AGRAD', 'GRAD') THEN 12 + -- Undergraduate student + WHEN trim(both from patron.profile) IN ('AUND', 'UND') THEN 13 + -- Readers (obviously need to map these to something better + WHEN trim(both from patron.profile) = 'DIRB' THEN 14 + WHEN trim(both from patron.profile) = 'EXAL' THEN 14 + WHEN trim(both from patron.profile) = 'EXEC' THEN 14 + WHEN trim(both from patron.profile) = 'EXOT' THEN 14 + WHEN trim(both from patron.profile) = 'ILL' THEN 14 + WHEN trim(both from patron.profile) = 'LAW1' THEN 14 + WHEN trim(both from patron.profile) = 'LAW2' THEN 14 + WHEN trim(both from patron.profile) = 'LAW3' THEN 14 + WHEN trim(both from patron.profile) = 'LAW COUR' THEN 14 + WHEN trim(both from patron.profile) = 'LAW DAY365' THEN 14 + WHEN trim(both from patron.profile) = 'LAW KEY2' THEN 14 + WHEN trim(both from patron.profile) = 'STAF' THEN 14 + WHEN trim(both from patron.profile) IS NULL THEN 14 + ELSE 14 + END AS profile, + trim(both from patron.username) AS usrname, + trim(both from patron.password) AS passwd, + CASE + WHEN patron.standing = 0 THEN 2 -- interpreted as "Barred"? + ELSE 1 + END AS standing, + CASE + WHEN patron.identity_type = 'Other' THEN 3 + END AS ident_type, + trim(both from patron.identity_value) AS ident_value, + trim(both from patron.first_given_name) AS first_given_name, + CASE + WHEN trim(both from patron.second_given_name) != '' THEN patron.second_given_name + ELSE NULL + END AS second_given_name, + trim(both from patron.family_name) AS family_name, + CASE + WHEN trim(both from patron.day_phone) != '' THEN patron.day_phone + ELSE NULL + END AS day_phone, + CASE + WHEN trim(both from patron.home_library) = 'Leddy' THEN 109 + WHEN trim(both from patron.home_library) = 'Law' THEN 122 + ELSE 109 + END AS home_ou, + patron.active as active, + patron.barred as barred, + patron.deleted as deleted, + CASE + WHEN trim(both from patron.alert_message) != '' THEN patron.alert_message + ELSE NULL + END AS alert_message, + patron.create_date::DATE as create_date, + patron.expire_date::DATE as expire_date + FROM staging_patron patron +; + +-- And add our mobile and other phones to the patron record +-- Mapping Windsor's "other" to evening_phone in Evergreen +UPDATE actor.usr au + SET evening_phone = TRIM(sop.phone) + FROM staging_other_phone sop + WHERE sop.barcode = au.ident_value +; + +-- Mapping Windsor's "mobile" to other_phone in Evergreen +UPDATE actor.usr au + SET day_phone = TRIM(smp.phone) + FROM staging_mobile_phone smp + WHERE smp.barcode = au.ident_value +; + +-- Now we need to generate actor.card entries +-- And be careful to distinguish Windsor vs. existing entries +INSERT INTO actor.card (usr, barcode, active) + SELECT au.id, au.ident_value, 't' + FROM actor.usr au + WHERE au.ident_value IS NOT NULL + AND au.home_ou IN (109, 122) + AND au.ident_value NOT IN (SELECT barcode FROM actor.card) + AND au.id > 1 +; + +UPDATE actor.usr au SET card = ac.id + FROM actor.card ac + WHERE ac.barcode = au.ident_value + AND au.card IS NULL + AND au.id > 1 +; + +-- Get rid of the "old" barcodes that we inserted into actor.usr +DELETE FROM staging_barcode + WHERE old_barcode IN (SELECT barcode FROM actor.card); + +INSERT INTO actor.card (usr, barcode, active) + SELECT au.id, sb.old_barcode, sb.active + FROM staging_barcode sb + INNER JOIN actor.usr au + ON (sb.barcode = au.ident_value AND au.id > 1) + WHERE sb.old_barcode IS NOT NULL + AND sb.old_barcode NOT IN (SELECT barcode FROM actor.card) +; + +UPDATE staging_patron_address + SET county = NULL + WHERE TRIM(county) = ''; + +UPDATE staging_patron_address + SET address_type = NULL + WHERE TRIM(address_type) = '' +; + +INSERT INTO actor.usr_address (usr, address_type, street1, street2, city, state, county, country, post_code) + SELECT DISTINCT + ac.usr, + TRIM(sa.address_type), + TRIM(sa.street_1), + TRIM(sa.street_2), + TRIM(sa.city), + TRIM(sa.province), + TRIM(sa.county), + TRIM(sa.country), + TRIM(sa.postal_code) + FROM staging_patron_address sa + INNER JOIN actor.card ac ON (ac.barcode = sa.barcode) +; + +-- This is how we're getting this set of dates +SET DateStyle TO 'DMY'; + +INSERT INTO actor.usr_note (usr, creator, create_date, pub, title, value) + SELECT + ac.usr, + 1, -- We don't have the real creator in the staging table, so make it admin + CASE + WHEN TRIM(create_date) != '' THEN sn.create_date::DATE + ELSE '01-May-00'::DATE + END AS create_date, + sn.publicly_visible, + TRIM(sn.title), + TRIM(sn.note) + FROM staging_note sn + INNER JOIN actor.card ac ON (ac.barcode = sn.barcode) +; + +--COMMIT; +--ROLLBACK; + +-- vim: et:ts=4:sw=4: -- 2.11.0