From ecdb89d776ac55ec149880c10d1e157632afe153 Mon Sep 17 00:00:00 2001 From: miker Date: Thu, 15 Oct 2009 00:47:55 +0000 Subject: [PATCH] Yet more progress towards in-db ingest. This time, a MARC flattener for metabib.full_rec data. git-svn-id: svn://svn.open-ils.org/ILS/trunk@14430 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/sql/Pg/002.schema.config.sql | 2 +- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 67 +++++++++++++++++++- .../sql/Pg/upgrade/0036.schema.flatten_marc.sql | 71 ++++++++++++++++++++++ 3 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/0036.schema.flatten_marc.sql diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index ede19f5606..dce7d5fa97 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -51,7 +51,7 @@ CREATE TABLE config.upgrade_log ( install_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ); -INSERT INTO config.upgrade_log (version) VALUES ('0035'); -- miker +INSERT INTO config.upgrade_log (version) VALUES ('0036'); -- miker CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index cab6a85eee..d1632c4d6b 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -347,5 +347,70 @@ CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( BIGINT ) RETURNS SELECT * FROM biblio.extract_metabib_field_entry($1, ' '); $func$ LANGUAGE SQL; -COMMIT; +CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + output metabib.full_rec%ROWTYPE; + field RECORD; +BEGIN + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + FOR field IN SELECT * FROM biblio.flatten_marc( bib.marc ) LOOP + output.record := rid; + output.ind1 := field.ind1; + output.ind2 := field.ind2; + output.tag := field.tag; + output.subfield := field.subfield; + IF field.subfield IS NOT NULL THEN + output.value := naco_normalize(field.value, field.subfield); + ELSE + output.value := field.value; + END IF; + RETURN NEXT output; + END LOOP; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION biblio.flatten_marc ( TEXT ) RETURNS SETOF metabib.full_rec AS $func$ + +use MARC::Record; +use MARC::File::XML; + +my $xml = shift; +my $r = MARC::Record->new_from_xml( $xml ); + +return_next( { tag => 'LDR', value => $r->leader } ); + +for my $f ( $r->fields ) { + if ($f->is_control_field) { + return_next({ tag => $f->tag, value => $f->data }); + } else { + for my $s ($f->subfields) { + return_next({ + tag => $f->tag, + ind1 => $f->indicator(1), + ind2 => $f->indicator(2), + subfield => $s->[0], + value => $s->[1] + }); + + if ( $f->tag eq '245' and $s->[0] eq 'a' ) { + my $trim = $f->indicator(2) || 0; + return_next({ + tag => 'tnf', + ind1 => $f->indicator(1), + ind2 => $f->indicator(2), + subfield => 'a', + value => substr( $s->[1], $trim ) + }); + } + } + } +} + +return undef; + +$func$ LANGUAGE PLPERLU; + +COMMIT; diff --git a/Open-ILS/src/sql/Pg/upgrade/0036.schema.flatten_marc.sql b/Open-ILS/src/sql/Pg/upgrade/0036.schema.flatten_marc.sql new file mode 100644 index 0000000000..23e20157d4 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0036.schema.flatten_marc.sql @@ -0,0 +1,71 @@ +BEGIN; + +INSERT INTO config.upgrade_log (version) VALUES ('0036'); -- miker + +CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + output metabib.full_rec%ROWTYPE; + field RECORD; +BEGIN + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + FOR field IN SELECT * FROM biblio.flatten_marc( bib.marc ) LOOP + output.record := rid; + output.ind1 := field.ind1; + output.ind2 := field.ind2; + output.tag := field.tag; + output.subfield := field.subfield; + IF field.subfield IS NOT NULL THEN + output.value := naco_normalize(field.value, field.subfield); + ELSE + output.value := field.value; + END IF; + + RETURN NEXT output; + END LOOP; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION biblio.flatten_marc ( TEXT ) RETURNS SETOF metabib.full_rec AS $func$ + +use MARC::Record; +use MARC::File::XML; + +my $xml = shift; +my $r = MARC::Record->new_from_xml( $xml ); + +return_next( { tag => 'LDR', value => $r->leader } ); + +for my $f ( $r->fields ) { + if ($f->is_control_field) { + return_next({ tag => $f->tag, value => $f->data }); + } else { + for my $s ($f->subfields) { + return_next({ + tag => $f->tag, + ind1 => $f->indicator(1), + ind2 => $f->indicator(2), + subfield => $s->[0], + value => $s->[1] + }); + + if ( $f->tag eq '245' and $s->[0] eq 'a' ) { + my $trim = $f->indicator(2) || 0; + return_next({ + tag => 'tnf', + ind1 => $f->indicator(1), + ind2 => $f->indicator(2), + subfield => 'a', + value => substr( $s->[1], $trim ) + }); + } + } + } +} + +return undef; + +$func$ LANGUAGE PLPERLU; + +COMMIT; -- 2.11.0