From 67f075cd83555443d605061796c81aa40b15b87e Mon Sep 17 00:00:00 2001 From: dbs Date: Fri, 6 Mar 2009 17:25:22 +0000 Subject: [PATCH] Commit some scripts and custom SQL that we're using for migration and testing git-svn-id: svn://svn.open-ils.org/ILS-Contrib/conifer/trunk@141 6d9bc8c9-1ec2-4278-b937-99fde70a366f --- migration-scripts/create_test_users.pl | 68 ++ migration-scripts/demousers.sql | 1124 ++++++++++++++++++++++++++++++++ migration-scripts/generate_copies.sql | 154 +++++ migration-scripts/lul_import.pl | 94 +++ migration-scripts/org_units.sql | 79 +++ migration-scripts/windsor_import.pl | 94 +++ 6 files changed, 1613 insertions(+) create mode 100644 migration-scripts/create_test_users.pl create mode 100644 migration-scripts/demousers.sql create mode 100644 migration-scripts/generate_copies.sql create mode 100644 migration-scripts/lul_import.pl create mode 100644 migration-scripts/org_units.sql create mode 100644 migration-scripts/windsor_import.pl diff --git a/migration-scripts/create_test_users.pl b/migration-scripts/create_test_users.pl new file mode 100644 index 0000000000..81fce98afe --- /dev/null +++ b/migration-scripts/create_test_users.pl @@ -0,0 +1,68 @@ +#!/usr/bin/perl +use strict; +use warnings; + +=head1 Generate a set of staff and users for testing Evergreen + +=over + +=item get_org_unit() + +Map user IDs to org_units for permissions + +=back + +=cut + +sub get_org_unit { + my $id = shift; + if ($id <= 10) { + return 105; # OSUL + } elsif ($id <=20) { + return 106; # WINDSYS + } elsif ($id <=30) { + return 111; # ALGOMASYS + } elsif ($id <=40) { + return 125; # NOHIN + } +} + +my $password = 'demo123'; + +my @profiles = ( 1, 4, 5, 10 ); + +my $profile_info = { + '1' => 'user', + '4' => 'cat', + '5' => 'circ', + '10' => 'admin', +}; + +my $sql = "BEGIN;\n"; + +foreach my $i (1..40) { + foreach my $profile (@profiles) { + my $twodigit = sprintf('%02d', $i); + my $barcode = '1' . ($profile == 10 ? '0' : $profile) . $twodigit; + my $usrname = $profile_info->{$profile . ""} . $i; + my $family_name = ucfirst($usrname); + my $org_unit = get_org_unit($i); + $sql .= <library mappings +INSERT INTO asset.call_number (creator,editor,record,label,owning_lib) + SELECT DISTINCT 1, 1, l.bibkey , l.callnum, ou.id + FROM staging_items l + JOIN biblio.record_entry b ON (l.bibkey = b.id) + JOIN actor.org_unit ou ON (l.owning_lib = ou.shortname); + +-- Import base copy data +INSERT INTO asset.copy ( + circ_lib, creator, editor, create_date, barcode, + status, location, loan_duration, + fine_level, circ_modifier, deposit, ref, call_number) + SELECT DISTINCT ou.id AS circ_lib, + 1 AS creator, + 1 AS editor, + l.createdate AS create_date, + l.barcode AS barcode, + CASE + WHEN l.location = 'BINDERY' THEN 2 + WHEN l.location = 'CATALOGING' THEN 11 + WHEN l.location = 'DISCARD' THEN 13 + WHEN l.location = 'ILL' THEN 10 + WHEN l.location = 'INPROCESS' THEN 5 + WHEN l.location = 'LOST' THEN 3 + WHEN l.location = 'LONGOVRDUE' THEN 4 + WHEN l.location = 'MISSING' THEN 4 + WHEN l.location = 'ON-ORDER' THEN 9 + WHEN l.location = 'REPAIR' THEN 14 + ELSE 0 + END AS status, + cl.id AS location, + 2 AS loan_duration, + 2 AS fine_level, + CASE + WHEN l.item_type IN ('REFERENCE', 'DEPOSIT_BK', 'BOOKS') THEN 'BOOK' + WHEN l.item_type IN ('PERIODICALS') THEN 'PERIODICAL' + ELSE l.item_type + END AS circ_modifier, + CASE + WHEN l.item_type = 'DEPOSIT_BK' THEN TRUE + ELSE FALSE + END AS deposit, + CASE + WHEN l.item_type = 'REFERENCE' THEN TRUE + ELSE FALSE + END AS ref, + cn.id AS call_number + FROM staging_items l + JOIN actor.org_unit ou + ON (l.owning_lib = ou.shortname) + JOIN asset.copy_location cl + ON (ou.id = cl.owning_lib AND l.location = cl.name) + JOIN asset.call_number cn + ON (ou.id = cn.owning_lib AND l.bibkey = cn.record AND l.callnum = cn.label) +; +COMMIT; diff --git a/migration-scripts/lul_import.pl b/migration-scripts/lul_import.pl new file mode 100644 index 0000000000..15857c7f9f --- /dev/null +++ b/migration-scripts/lul_import.pl @@ -0,0 +1,94 @@ +#!/usr/bin/perl +use warnings; +use strict; + +=head1 Automated processing of Laurentian system bibliographic records + +First we split the records into many smaller files (defined by $chunksize), +then process a fraction of the records (defined by 1/$denominator) from +across the set of files. + +=cut + +my $chunk_size = 1000; +my $chunk_dir = "chunks"; +my $chunk_prefix = 'chunk_'; +my $denominator = 20; + +my $marc = 'bibs.marc'; +my $xml_file = 'bibs.xml'; +my $bre_file = 'bibs.bre'; +my $ingest_file = 'bibs.ingest'; +my $loader_prefix = 'lul_load'; +my $tcn_dumpfile = 'tcn.dump'; +my $script_dir = '/home/lauadmin/Evergreen/Open-ILS/src/extras/import/'; +my $db_user = 'evergreen'; +my $db_pw = ''; +my $db_host = ''; +my $db_name = 'evergreen'; + +my $input_files; + +convert_to_marcxml("true"); +process_bibs("--tcnfield 935 --tcnsubfield a --idfield 935 --idsubfield a --trash 901 --trash 949"); + +=over + +=item convert_to_marcxml(is_marc8) + +=back + +=cut +sub convert_to_marcxml { + my $is_marc8 = shift; + my $marc8_to_utf8; + +=over + +Split the files up into chunks + +=back + +=cut + if (!-d $chunk_dir) { + mkdir($chunk_dir); + } + + system("yaz-marcdump -C $chunk_size -s chunks/$chunk_prefix $marc > /dev/null"); + +=over + +Convert selected sample of records to MARC21XML + +=back + +=cut + my @files = glob("$chunk_dir/$chunk_prefix\*"); + + foreach my $i (1..$denominator) { + my $filenumber = sprintf("%07i", (scalar(@files) / $denominator * $i) - 1); + $input_files .= "$chunk_dir/$chunk_prefix$filenumber "; + } + if ($is_marc8) { + $marc8_to_utf8 = '-f MARC-8 -t UTF-8 -l 9=97'; + } + system("yaz-marcdump -i marc -o marcxml $marc8_to_utf8 $input_files > $xml_file"); +} + +=over + +=item process_bibs(marc2bre_options) + +Starting with a set of MARC21XML records, these commands generate a set of +SQL files suitable for loading into an Evergreen system. + +=back + +=cut +sub process_bibs { + my $marc2bre_options = shift; + + system("perl $script_dir/marc2bre.pl --marctype XML --tcn_dumpfile $tcn_dumpfile --db_user $db_user --db_host $db_host --db_pw $db_pw --db_name $db_name $marc2bre_options $xml_file > $bre_file 2> marc2bre.err"); + system("perl $script_dir/direct_ingest.pl $bre_file > $ingest_file 2> ingest.err"); + system("perl $script_dir/parallel_pg_loader.pl --output $loader_prefix -or bre -or mrd -or mfr -or mtfe -or mafe -or msfe -or mkfe -or msefe -a mrd -a mfr -a mtfe -a mafe -a msfe -a mkfe -a msefe < $ingest_file 2> loader.err"); +} diff --git a/migration-scripts/org_units.sql b/migration-scripts/org_units.sql new file mode 100644 index 0000000000..84cd232208 --- /dev/null +++ b/migration-scripts/org_units.sql @@ -0,0 +1,79 @@ +-- +-- PostgreSQL database dump +-- + +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = off; +SET check_function_bodies = false; +SET client_min_messages = warning; +SET escape_string_warning = off; + +SET search_path = actor, pg_catalog; + +-- +-- Name: org_unit_type_id_seq; Type: SEQUENCE SET; Schema: actor; Owner: evergreen +-- + +SELECT pg_catalog.setval('org_unit_type_id_seq', 100, true); + + +-- +-- Data for Name: org_unit; Type: TABLE DATA; Schema: actor; Owner: evergreen +-- + +COPY org_unit (id, parent_ou, ou_type, ill_address, holds_address, mailing_address, billing_address, shortname, name, email, phone, opac_visible) FROM stdin; +115 102 3 \N \N \N \N HRSRH HRSRH Health Sciences Library t +123 102 3 \N \N \N \N NEMHC Northeast Mental Health Centre \N \N t +105 102 6 1 1 1 1 LUSYS Laurentian University t +108 105 7 \N \N \N \N MEDIACEN Instructional Media Centre t +103 105 7 1 1 1 1 OSUL J.N. Desmarais Library t +107 105 7 \N \N \N \N SUDBURY University of Sudbury t +104 105 7 1 1 1 1 HUNTINGTON Huntington College Library t +117 105 7 \N \N \N \N MEDB Mining and the Environment Database t +1 \N 1 1 1 1 1 CONIFER Conifer t +102 1 2 1 1 1 1 LAURSYS Laurentian System t +106 1 2 \N \N \N \N WINDSYS Windsor System t +109 106 3 \N \N \N \N LEDDY Leddy Library t +112 102 3 \N \N \N \N ARTGALL Art Gallery of Sudbury t +113 102 3 \N \N \N \N CFOF Centre Franco-Ontarien de Folklore t +116 102 3 \N \N \N \N SAH Sault Area Hospital t +118 102 3 \N \N \N \N MNDM Mines Library, Willet Green Miller Centre t +119 102 3 \N \N \N \N FALCON Xstrata Process Support Centre Library, Falconbridge t +120 102 3 \N \N \N \N INCO Vale Inco t +122 106 3 \N \N \N \N WINDLAQ Paul Martin Law Library t +111 1 2 \N \N \N \N ALGOMASYS Algoma System \N \N t +124 111 3 \N \N \N \N ALGOMA Algoma University, Wishart Library t +125 1 2 \N \N \N \N OSM NOHIN \N \N t +126 125 3 \N \N \N \N NOSMW Northern Ontario School of Medicine (West) \N \N t +110 125 3 \N \N \N \N NOSME Northern Ontario School of Medicine (East) t +114 102 6 \N \N \N \N HEARSTSYS Université de Hearst t +127 114 7 \N \N \N \N HEARST Hearst, Bibliothèque Maurice-Saulnier \N \N t +128 114 7 \N \N \N \N KAP Hearst Kapuskasing, Centre de Ressources \N \N t +129 114 7 \N \N \N \N TIMMINS Hearst Timmins, Centre de Ressources \N \N t +130 105 7 \N \N \N \N LUEDUC School of Education, Curriculum Resource Centre \N \N t +131 105 7 \N \N \N \N LUMUSIC School of Education, Music Resource Centre \N \N t +132 105 7 \N \N \N \N EDUCATION Laboratoire de didactiques, E.S.E. \N \N t +\. + + + +-- +-- Data for Name: org_unit_type; Type: TABLE DATA; Schema: actor; Owner: evergreen +-- + +COPY org_unit_type (id, name, opac_label, depth, parent, can_have_vols, can_have_users) FROM stdin; +1 Consortium Everywhere 0 \N f f +2 System Local Library System 1 1 f f +4 Sub-lib This Specialized Library 3 3 t t +5 Bookmobile Your Bookmobile 3 3 t t +7 University-Library University Library 3 6 t t +3 Branch This Branch 2 2 t t +6 University University System 2 2 t t +\. + + +-- +-- PostgreSQL database dump complete +-- + +SELECT SETVAL('actor.org_unit_id_seq', (SELECT MAX(id) FROM actor.org_unit)); diff --git a/migration-scripts/windsor_import.pl b/migration-scripts/windsor_import.pl new file mode 100644 index 0000000000..f6561718f0 --- /dev/null +++ b/migration-scripts/windsor_import.pl @@ -0,0 +1,94 @@ +#!/usr/bin/perl +use warnings; +use strict; + +=head1 Automated processing of Windsor system bibliographic records + +First we split the records into many smaller files (defined by $chunksize), +then process a fraction of the records (defined by 1/$denominator) from +across the set of files. + +=cut + +my $chunk_size = 1000; +my $chunk_dir = "chunks"; +my $chunk_prefix = 'chunk_'; +my $denominator = 20; + +my $marc = 'windsor_bibs.marc'; +my $xml_file = 'windsor_bibs.xml'; +my $bre_file = 'bibs.bre'; +my $ingest_file = 'bibs.ingest'; +my $loader_prefix = 'windsor_load'; +my $tcn_dumpfile = 'tcn.dump'; +my $script_dir = '/home/lauadmin/Evergreen-trunk/Open-ILS/src/extras/import/'; +my $db_user = 'evergreen'; +my $db_pw = ''; +my $db_host = ''; +my $db_name = ''; + +my $input_files; + +convert_to_marcxml("false"); +process_bibs("--tcnfield 001 --idfield 001"); + +=over + +=item convert_to_marcxml(is_marc8) + +=back + +=cut +sub convert_to_marcxml { + my $is_marc8 = shift; + my $marc8_to_utf8; + +=over + +Split the files up into chunks + +=back + +=cut + if (!-d $chunk_dir) { + mkdir($chunk_dir); + } + + system("yaz-marcdump -C $chunk_size -s chunks/$chunk_prefix $marc > /dev/null"); + +=over + +Convert selected sample of records to MARC21XML + +=back + +=cut + my @files = glob("$chunk_dir/$chunk_prefix\*"); + + foreach my $i (1..$denominator) { + my $filenumber = sprintf("%07i", (scalar(@files) / $denominator * $i) - 1); + $input_files .= "$chunk_dir/$chunk_prefix$filenumber "; + } + if ($is_marc8) { + $marc8_to_utf8 = '-f MARC-8 -t UTF-8 -l 9=97'; + } + system("yaz-marcdump -i marc -o marcxml $marc8_to_utf8 $input_files > $xml_file"); +} + +=over + +=item process_bibs(marc2bre_options) + +Starting with a set of MARC21XML records, these commands generate a set of +SQL files suitable for loading into an Evergreen system. + +=back + +=cut +sub process_bibs { + my $marc2bre_options = shift; + + system("perl $script_dir/windsor_marc2bre.pl --marctype XML --tcn_dumpfile $tcn_dumpfile --db_user $db_user --db_host $db_host --db_pw $db_pw --db_name $db_name $marc2bre_options $xml_file > $bre_file 2> marc2bre.err"); + system("perl $script_dir/direct_ingest.pl $bre_file > $ingest_file 2> ingest.err"); + system("perl $script_dir/parallel_pg_loader.pl --output $loader_prefix -or bre -or mrd -or mfr -or mtfe -or mafe -or msfe -or mkfe -or msefe -a mrd -a mfr -a mtfe -a mafe -a msfe -a mkfe -a msefe < $ingest_file 2> loader.err"); +} -- 2.11.0