Add bib importing information provided by Michael Peters and Repke De Vries.
authorRobert Soulliere <rsoulliere@libdog.mohawkcollege.ca>
Tue, 1 Feb 2011 21:00:23 +0000 (16:00 -0500)
committerRobert Soulliere <rsoulliere@libdog.mohawkcollege.ca>
Tue, 1 Feb 2011 21:00:23 +0000 (16:00 -0500)
1.6/admin/migratingdata.xml

index eaa5105..af49df8 100644 (file)
@@ -14,8 +14,8 @@
                <indexterm><primary>migrating</primary><secondary>importing bibliographic records</secondary></indexterm>\r
                <para>\r
                One of the most important and challenging  tasks is migrating your bibliographic records to a new system. The procedure may be different depending on the system from which you \r
-               are migrating  and the content of the marc records exported from the existing system. The proecedures in this section deal with the process once the data from the existing system \r
-               is exporterd into marc records. It does not cover exporting data from your existing non-Evergreen system.</para>\r
+               are migrating  and the content of the marc records exported from the existing system. The procedures in this section deal with the process once the data from the existing system \r
+               is exported into marc records. It does not cover exporting data from your existing non-Evergreen system.</para>\r
                <para>Several tools for importing bibliographic records into Evergreen can be found in the Evergreen installation folder \r
                (<filename class="directory">/home/opensrf/Evergreen-ILS-1.6.1.6/Open-ILS/src/extras/import/</filename> ) and are also available from the Evergreen repository \r
                (<link xl:href="http://svn.open-ils.org/trac/ILS/browser/branches/rel_1_6_1/Open-ILS/src/extras/import" xl:title="import scripts - Evergreen repository">\r
@@ -123,45 +123,289 @@ http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
                        <para>Metarecords are required to place holds on items, among other actions.</para>\r
                </simplesect>   \r
        </section>\r
+\r
+\r
+\r
+<section xml:id="migrating_records_using_migration_tools">\r
+               <title>Migrating Bibliographic Records Using the ESI MigUration Tools</title>\r
+               <indexterm><primary>migrating</primary><secondary>Migration Tools</secondary></indexterm>       \r
+               <para>The following procedure explains how to migrate bibliographic records from marc records into Evergreen. It does not cover exporting records from specific proprietary ILS \r
+               systems. For assistance with exporting records from your current system please refer to the manuals for your system or you might try to ask for help from the <link linkend="more_info">Evergreen community</link>.</para>\r
+               \r
+               <procedure>\r
+                       <step>\r
+                               <para>Download the Evergreen <link xl:href="http://git.esilibrary.com/?p=migration-tools.git;a=summary" \r
+                               xl:title="Equinox migration utilities">migration utilities</link> from the git repository.</para>\r
+                               <para>Use the command <command>git clone git://git.esilibrary.com/git/migration-tools.git</command> to clone the migration tools.</para>\r
+                                <para>Install the migration tools:</para>\r
+<screen>\r
+<userinput>\r
+<![CDATA[\r
+cd migration-tools/Equinox-Migration\r
+perl Makefile.PL\r
+make\r
+make test\r
+make install\r
+]]>\r
+</userinput>\r
+</screen>\r
+</step>        \r
+<step>\r
+                               <para>Dump marc records into MARCXML using <systemitem>yaz-marcdump</systemitem></para>\r
+\r
+<screen>\r
+<userinput>\r
+<![CDATA[\r
+echo '<?xml version="1.0" encoding="UTF-8" ?>' > imported_marc_records.xml\r
+yaz-marcdump -f MARC-8 -t UTF-8 -o marcxml imported_marc_records.mrc >> imported_marc_records.xml\r
+]]>\r
+</userinput>\r
+</screen>\r
+</step>        \r
+<step>\r
+                               <para>Test validity of XML file using <systemitem>xmllint</systemitem></para>\r
+\r
+<screen>\r
+\r
+<userinput>\r
+<![CDATA[\r
+ xmllint --noout imported_marc_records.xml 2> marc.xml.err\r
+]]>\r
+</userinput>\r
+</screen>\r
+</step>        \r
+\r
+<step>\r
+                               <para>Clean up the marc xml file using the <systemitem>marc_cleanup</systemitem> utility:</para>\r
+<screen>\r
+<userinput>\r
+marc_cleanup --marcfile=imported_marc_records.xml --fullauto [--renumber-from #] -ot 001\r
+</userinput>   \r
+</screen>\r
+                               <para>The <option>--renumber-from</option> is required if you have bibliographic records already in your system. Use this to set the starting id number higher \r
+                               then the last id in the biblio.record_entry table. The marc_cleanup command will generate a file called <filename>clean.marc.xml</filename></para>\r
+</step>\r
+<step>\r
+                               <para>Create a fingerprinter file using the <systemitem>fingerprinter</systemitem> utility:</para>\r
+<screen>\r
+<userinput>\r
+fingerprinter -o incumbent.fp -x incumbent.ex clean.marc.xml\r
+</userinput>\r
+                               <para><systemitem>fingerprinter</systemitem> is used for deduplification of the incumbent records. The <option>-o</option> option specifies the \r
+                               output file and the <option>-x</option> option is used to specify the error output file.</para>\r
+</screen>\r
+</step>\r
+<step>\r
+                               <para>Create a fingerprinter file for existing Evergreen bibliographic records using the <systemitem>fingerprinter</systemitem> utility if you \r
+                               have existing bibliographic records in your system previously imported:</para>\r
+<screen>\r
+<userinput>\r
+fingerprinter -o production.fp -x production.fp.ex --marctype=MARC21 existing_marc_records.mrc --tag=901 --subfield=c\r
+</userinput>\r
+</screen>\r
+                               <para><systemitem>fingerprinter</systemitem> is used for deduplification of the incumbant records.</para>\r
+</step>\r
+\r
+<step>\r
+                               <para>Create a merged fingerprint file removing duplicate records.</para>\r
+<screen>\r
+<userinput>\r
+cat cat production.fp incumbent.fp | sort -r > dedupe.fp\r
+match_fingerprints [-t start id] -o records.merge dedupe.fp\r
+</userinput>\r
+</screen>\r
+</step>\r
+\r
+<step>\r
+                               <para>Create a new import XML file using the <systemitem>extract_loadset</systemitem> utility</para>\r
+<screen>\r
+<userinput>extract_loadset -l 1 -i clean.marc.xml -o merged.xml records.merge</userinput>\r
+</screen>\r
+</step>\r
+<step>\r
+                               <para>Extract all of the currently used TCN's an generate the .bre and .ingest files to prepare for the bibliographic record load.</para>\r
+<screen>\r
+<userinput>\r
+psql -U evergreen -c "select tcn_value from biblio.record_entry where not deleted" | perl -npe 's/^\s+//;' > used_tcns\r
+marc2bre.pl --idfield 903 [--startid=#] --marctype=XML -f final.xml --used_tcn_file=used_tcns > evergreen_bre_import_file.bre\r
+</userinput>\r
+</screen>\r
+                               <note>\r
+                                       <para> The option <option>--startid</option> needs to match the start id used in earlier steps and must be higher than largest id value \r
+                                       in the biblio.record_entry table. the option <option>--idfield</option> should match the marc datafield used to store your records ids.</para>\r
+                               </note>\r
+</step>\r
+\r
+<step>\r
+                               <para>Ingest the bibliographic records into the Evergreen database.</para>\r
+<screen>\r
+<userinput>\r
+<![CDATA[\r
+direct_ingest.pl < evergreen_bre_import_file.bre > evergreen_ingest_file.ingest\r
+parallel_pg_loader.pl \\r
+-or bre \\r
+-or mrd \\r
+-or mfr \\r
+-or mtfe \\r
+-or mafe \\r
+-or msfe \\r
+-or mkfe \\r
+-or msefe \\r
+-a mrd \\r
+-a mfr \\r
+-a mtfe \\r
+-a mafe \\r
+-a msfe \\r
+-a mkfe \\r
+-a msefe evergreen_ingest_file.ingest\r
+]]>\r
+</userinput>\r
+</screen>\r
+                       </step>\r
+                       <step>\r
+                               <para>Load the records using psql and the sql scripts generated from the previous step.</para>\r
+<screen>\r
+<userinput>\r
+<![CDATA[\r
+psql -U evergreen < pg_loader-output.sql > load_pg_loader-output\r
+psql -U evergreen < create_metabib.sql > log.create_metabib\r
+]]>\r
+</userinput>\r
+</screen>\r
+                       </step>\r
+                       <step>\r
+                               <para>Extract holdings from marc records for importing copies into Evergreen using the <systemitem>extract_holdings</systemitem> utility.</para>\r
+<screen>\r
+<userinput>\r
+extract_holdings --marcfile=marc.clean.xml --holding 999 --copyid 999i --map holdings.map\r
+</userinput>\r
+</screen>\r
+                               <para>This command would extract holdings based on the 949 datafield in the marc records. The copy id is generated from the subfile b in the 949 datafield. You may \r
+                               need to adjust these options based on the field used for holdings informatiom in your marc records.</para>\r
+                               <para>The <option>map</option> option <filename>holdings.map</filename> refers to a file to be used for mapping subfields to the holdings data you would like extracted. Here is an example based on mapping holdings data to the 999 data field:</para>\r
+<programlisting>\r
+<![CDATA[\r
+call_num 999 a\r
+barcode 999 i\r
+location 999 l\r
+owning_lib 999 m\r
+circ_modifier 999 t\r
+]]>\r
+</programlisting>\r
+                               <para>Running the extract holdings script should produce the sql file <filename>HOLDINGS.pg</filename> similar to:</para>\r
+<programlisting language="sql">\r
+BEGIN;\r
+\r
+egid, hseq, l_call_num, l_barcode, l_location, l_owning_lib, l_circ_modifier,\r
+40      0       HD3616.K853 U54 1997    30731100751928  STACKS  FENNELL BOOK\r
+41      1       HV6548.C3 S984 1998     30731100826613  STACKS  FENNELL BOOK\r
+41      2       HV6548.C3 S984 1998     30731100804958  STACKS  BRANTFORD       BOOK\r
+...\r
+</programlisting>\r
+  \r
+                               <para>This file can be used for importing holdings into Evergreen. the <database class="field">egid</database> is a critical column. It is used to link the volume and copy to \r
+                               the bibliographic record. Please refer to <link linkend="migratingbibrecordcopies">for the steps to import your holdings into Evergreen.</link></para> \r
+                       \r
+                       </step>\r
+               </procedure>\r
+               \r
+       </section>\r
        <section xml:id="migratingbibrecordcopies">\r
                <title>Adding Copies to Bibliographic Records</title>\r
-               <indexterm><primary>migrating</primary><secondary>adding copies</secondary></indexterm> \r
-               <para>Once you've loaded the bibliographic records in Evergreen, you can search and view the records in the staff client, but they will not be visible in the catalogue. By \r
-               default, bibliographic records will not be visible in the catalogue until you add a copy representing a physical manifestation of that resource. You can add a copy manually through \r
-               the staff client via the Holdings maintenance screen, but if you're bulk-importing MARC records you probably want to bulk load the associated copies, call numbers, and barcodes as \r
-               well.</para>\r
-               <simplesect>\r
-                       <title>Importing volumes and copies from <systemitem>MARC21XML</systemitem> holdings</title>\r
-                       <indexterm><primary>migrating</primary><secondary>importing volumes</secondary></indexterm>     \r
-                       <para>There is currently no simple method for importing holdings based on the contents of the MARC holdings field (852, as specified by \r
-                       <link xml:href="http://www.loc.gov/marc/holdings/">http://www.loc.gov/marc/holdings/</link>). \r
-                       However, a more or less automated method could be built that performs the following steps:</para>\r
-                       <procedure>\r
-                               <step><para>Create a tab-delimited file that contains your holdings information</para>\r
-                               <itemizedlist>\r
-                                       <listitem>Required fields: bibliographic ID, barcode, and call number</listitem>                \r
-                                       <listitem>Optional fields: shelving location (text) – see  the <link linkend="asset.table.copy">asset.copy table</link> for \r
-                                       possible fields to include</listitem>\r
-                               </itemizedlist>         \r
-                               </step>\r
-                               <step><para>Create a staging table that matches the contents of your tab-delimited file.</para>\r
-                               </step>\r
-                               <step><para>Insert the contents of your tab-delimited file into the table.</para>\r
-                               </step>\r
-                               <step><para>Generate <systemitem>SQL</systemitem> scripts for item import to match the staging table that you created.</para>\r
-                               </step>\r
-                               <step><para>Run the <systemitem>SQL</systemitem> scripts to create the holdings in Evergreen.</para>\r
-                               </step>\r
-                       </procedure>\r
-                       <para>If an ILS has the concept of <quote>item categories</quote>, these may be mapped to Evergreen via statistical categories in the \r
-                       <link linkend="asset.table.stat-cat">asset.stat_cat table</link> . Note that statistical categories cannot be used as search filters; individual branches can define \r
-                       their own statistical categories; and define their own statistical category entries for individual items - best use case for statistical categories is probably for gifts.</para>\r
-                       <para>In 2009, Conifer placed their <link xl:href="http://svn.open-ils.org/trac/ILS-Contrib/browser/conifer/branches/rel_1_6_1/tools/migration-scripts"  \r
-                       xl:title="Conifer migration tools">migration tools</link> \r
-                       in the <link xl:href="http://svn.open-ils.org/trac/ILS-Contrib/browser/conifer" xl:title="Conifer ILS-Contrib SVN repository">Conifer ILS-Contrib SVN repository</link>, which might be useful samples augmenting the \r
-                       basic staging table import approach.</para>\r
-                       <para>In 2010, Equinox contributed a set of <link xl:href="http://git.esilibrary.com/?p=migration-tools.git;a=summary"  xl:title="Equinox migration utilities">migration utilities</link></para>\r
-               </simplesect>\r
+               <para>Before bibliographic records can be found in an OPAC search copies will need to be created.  It is very important to understand how various tables related to each other in regards \r
+               to holdings maintenance.</para> \r
+               <para>The following procedure will guide you through the process of populating Evergreen with volumes and copies. This is a very simple example. The SQL queries may need to be adjusted \r
+               for the specific data in your holdings.</para>  \r
+               <procedure>\r
+                       <step>\r
+                               <para>Create a staging_items staging table to hold the holdings data:</para>\r
+<programlisting language="sql">\r
+CREATE TABLE staging_items (\r
+        callnum text, -- call number label\r
+        bibkey  int,  -- biblio.record_entry_id\r
+        createdate      date,\r
+        location        text,\r
+        barcode         text,\r
+        item_type       text,\r
+        owning_lib      text  -- actor.org_unit.shortname\r
+);\r
+</programlisting>\r
+                       </step> \r
+                       <step>\r
+                               <para>Login to the Evergreen using psql and run the following COPY command to copy the items generated from extract_holdings utility:</para>\r
+<programlisting language="sql">\r
+COPY staging_items (bibkey, id, callnum, barcode, location, owning_lib, item_type) FROM 'HOLDINGS.pg'; \r
+</programlisting>\r
+                               <para>the file <filename>HOLDINGS.pg</filename> and/or the COPY query may need to be adjusted for your particular circumstances.</para>  \r
+                       </step>\r
+                       \r
+\r
+                       <step>\r
+                               <para>Generate shelving locations from your staging table.</para>\r
+<programlisting language="sql">\r
+INSERT INTO asset.copy_location (name, owning_lib)\r
+SELECT  DISTINCT l.location, ou.id\r
+FROM  staging_items l \r
+       JOIN actor.org_unit ou  ON (l.owning_lib = ou.shortname); \r
+</programlisting>\r
+                       </step>\r
+                       <step>\r
+                               <para>Generate circulation modifiers from your staging table.</para>\r
+<programlisting language="sql">\r
+INSERT INTO config.circ_modifier (code, name, description, sip2_media_type, magnetic_media)\r
+        SELECT  DISTINCT item_type AS code,\r
+          item_type AS name,\r
+          LOWER(item_type) AS description,\r
+          '001' AS sip2_media_type,\r
+          FALSE AS magnetic_media\r
+          FROM  staging_items\r
+          WHERE item_type NOT IN (SELECT code FROM config.circ_modifier);\r
+</programlisting>\r
+                       </step>\r
+                       <step>\r
+                               <para>Generate call numbers from your staging table:</para>\r
+<programlisting language="sql">\r
+INSERT INTO asset.call_number (creator,editor,record,label,owning_lib)\r
+SELECT  DISTINCT 1, 1, b.id, l.callnum, ou.id\r
+FROM  staging.staging_items l\r
+JOIN actor.org_unit ou ON (l.owning_lib = ou.shortname);\r
+</programlisting>\r
+                       </step>\r
+                       <step>\r
+                               <para>Generate copies from your staging table:</para>\r
+<programlisting language="sql">\r
+INSERT INTO asset.copy (\r
+circ_lib, creator, editor, create_date, barcode,\r
+STATUS, location, loan_duration, fine_level, circ_modifier, deposit, ref, call_number)         \r
+\r
+SELECT  DISTINCT ou.id AS circ_lib,\r
+       1 AS creator,\r
+       1 AS editor,\r
+       l.createdate AS create_date,\r
+       l.barcode AS barcode,\r
+       0 AS STATUS,\r
+       cl.id AS location,\r
+       2 AS loan_duration,\r
+       2 AS fine_level,\r
+       l.item_type AS circ_modifier,\r
+       FALSE AS deposit,\r
+       CASE\r
+       WHEN l.item_type = 'REFERENCE' THEN TRUE\r
+       ELSE FALSE\r
+       END AS ref,                                             \r
+       cn.id AS call_number\r
+       FROM  staging.staging_items l\r
+       JOIN actor.org_unit ou\r
+               ON (l.owning_lib = ou.shortname)\r
+       JOIN asset.copy_location cl\r
+               ON (ou.id = cl.owning_lib AND l.location = cl.name)\r
+       JOIN asset.call_number cn\r
+               ON (ou.id = cn.owning_lib      \r
+               AND l.callnum = cn.label);\r
+</programlisting>\r
+                               <para>You should now have copies in your Evergreen database and should be able to search and find the bibliographic records with attached copies.</para> \r
+                       </step>\r
+               </procedure>\r
        </section>\r
        <section xml:id="migratingpatrons">\r
                <title>Migrating Patron Data</title>\r