Yet more progress towards in-db ingest. This time, a MARC flattener for metabib...
authormiker <miker@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Thu, 15 Oct 2009 00:47:55 +0000 (00:47 +0000)
committermiker <miker@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Thu, 15 Oct 2009 00:47:55 +0000 (00:47 +0000)
git-svn-id: svn://svn.open-ils.org/ILS/trunk@14430 dcc99617-32d9-48b4-a31d-7c20da2025e4

Open-ILS/src/sql/Pg/002.schema.config.sql
Open-ILS/src/sql/Pg/030.schema.metabib.sql
Open-ILS/src/sql/Pg/upgrade/0036.schema.flatten_marc.sql [new file with mode: 0644]

index ede19f5..dce7d5f 100644 (file)
@@ -51,7 +51,7 @@ CREATE TABLE config.upgrade_log (
     install_date    TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
 );
 
-INSERT INTO config.upgrade_log (version) VALUES ('0035'); -- miker
+INSERT INTO config.upgrade_log (version) VALUES ('0036'); -- miker
 
 CREATE TABLE config.bib_source (
        id              SERIAL  PRIMARY KEY,
index cab6a85..d1632c4 100644 (file)
@@ -347,5 +347,70 @@ CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS
        SELECT * FROM biblio.extract_metabib_field_entry($1, ' ');
 $func$ LANGUAGE SQL;
 
-COMMIT;
+CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
+DECLARE
+       bib     biblio.record_entry%ROWTYPE;
+       output  metabib.full_rec%ROWTYPE;
+       field   RECORD;
+BEGIN
+       SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
+
+       FOR field IN SELECT * FROM biblio.flatten_marc( bib.marc ) LOOP
+               output.record := rid;
+               output.ind1 := field.ind1;
+               output.ind2 := field.ind2;
+               output.tag := field.tag;
+               output.subfield := field.subfield;
+               IF field.subfield IS NOT NULL THEN
+                       output.value := naco_normalize(field.value, field.subfield);
+               ELSE
+                       output.value := field.value;
+               END IF;
 
+               RETURN NEXT output;
+       END LOOP;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION biblio.flatten_marc ( TEXT ) RETURNS SETOF metabib.full_rec AS $func$
+
+use MARC::Record;
+use MARC::File::XML;
+
+my $xml = shift;
+my $r = MARC::Record->new_from_xml( $xml );
+
+return_next( { tag => 'LDR', value => $r->leader } );
+
+for my $f ( $r->fields ) {
+       if ($f->is_control_field) {
+               return_next({ tag => $f->tag, value => $f->data });
+       } else {
+               for my $s ($f->subfields) {
+                       return_next({
+                               tag      => $f->tag,
+                               ind1     => $f->indicator(1),
+                               ind2     => $f->indicator(2),
+                               subfield => $s->[0],
+                               value    => $s->[1]
+                       });
+
+                       if ( $f->tag eq '245' and $s->[0] eq 'a' ) {
+                               my $trim = $f->indicator(2) || 0;
+                               return_next({
+                                       tag      => 'tnf',
+                                       ind1     => $f->indicator(1),
+                                       ind2     => $f->indicator(2),
+                                       subfield => 'a',
+                                       value    => substr( $s->[1], $trim )
+                               });
+                       }
+               }
+       }
+}
+
+return undef;
+
+$func$ LANGUAGE PLPERLU;
+
+COMMIT;
diff --git a/Open-ILS/src/sql/Pg/upgrade/0036.schema.flatten_marc.sql b/Open-ILS/src/sql/Pg/upgrade/0036.schema.flatten_marc.sql
new file mode 100644 (file)
index 0000000..23e2015
--- /dev/null
@@ -0,0 +1,71 @@
+BEGIN;
+
+INSERT INTO config.upgrade_log (version) VALUES ('0036'); -- miker
+
+CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$
+DECLARE
+       bib     biblio.record_entry%ROWTYPE;
+       output  metabib.full_rec%ROWTYPE;
+       field   RECORD;
+BEGIN
+       SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
+
+       FOR field IN SELECT * FROM biblio.flatten_marc( bib.marc ) LOOP
+               output.record := rid;
+               output.ind1 := field.ind1;
+               output.ind2 := field.ind2;
+               output.tag := field.tag;
+               output.subfield := field.subfield;
+               IF field.subfield IS NOT NULL THEN
+                       output.value := naco_normalize(field.value, field.subfield);
+               ELSE
+                       output.value := field.value;
+               END IF;
+
+               RETURN NEXT output;
+       END LOOP;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION biblio.flatten_marc ( TEXT ) RETURNS SETOF metabib.full_rec AS $func$
+
+use MARC::Record;
+use MARC::File::XML;
+
+my $xml = shift;
+my $r = MARC::Record->new_from_xml( $xml );
+
+return_next( { tag => 'LDR', value => $r->leader } );
+
+for my $f ( $r->fields ) {
+       if ($f->is_control_field) {
+               return_next({ tag => $f->tag, value => $f->data });
+       } else {
+               for my $s ($f->subfields) {
+                       return_next({
+                               tag      => $f->tag,
+                               ind1     => $f->indicator(1),
+                               ind2     => $f->indicator(2),
+                               subfield => $s->[0],
+                               value    => $s->[1]
+                       });
+
+                       if ( $f->tag eq '245' and $s->[0] eq 'a' ) {
+                               my $trim = $f->indicator(2) || 0;
+                               return_next({
+                                       tag      => 'tnf',
+                                       ind1     => $f->indicator(1),
+                                       ind2     => $f->indicator(2),
+                                       subfield => 'a',
+                                       value    => substr( $s->[1], $trim )
+                               });
+                       }
+               }
+       }
+}
+
+return undef;
+
+$func$ LANGUAGE PLPERLU;
+
+COMMIT;