From 0cc6a3f035c145ffe98a082b63e08ac98f654266 Mon Sep 17 00:00:00 2001 From: Chris Sharp Date: Thu, 13 Sep 2012 07:32:09 -0400 Subject: [PATCH] add Dan Scott's is_valid_marcxml() function to ferret out trouble records --- .../version-upgrade/pines-pre-2.1-2.2-upgrade.sql | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Open-ILS/src/sql/Pg/version-upgrade/pines-pre-2.1-2.2-upgrade.sql b/Open-ILS/src/sql/Pg/version-upgrade/pines-pre-2.1-2.2-upgrade.sql index ab8d516426..06c6180257 100644 --- a/Open-ILS/src/sql/Pg/version-upgrade/pines-pre-2.1-2.2-upgrade.sql +++ b/Open-ILS/src/sql/Pg/version-upgrade/pines-pre-2.1-2.2-upgrade.sql @@ -3,6 +3,36 @@ -- remove replication schema DROP SCHEMA _replication CASCADE; +-- For preventing invalid marcxml - possible supplement or replacement for biblio.check_marcxml_well_formed? +-- Should be useful for flagging bad MARCXML that has slipped in over time + +CREATE OR REPLACE FUNCTION is_valid_marcxml(bibid BIGINT) RETURNS BOOL AS $func$ +use strict; +use MARC::Record; +use MARC::File::XML (BinaryEncoding => 'UTF-8'); +use MARC::Charset; +use Encode; +use Unicode::Normalize; + +MARC::Charset->assume_unicode(1); + +my $marc = spi_exec_query('SELECT marc FROM biblio.record_entry WHERE id = ' . $_[0]); +if (!$marc->{processed}) { + return 0; +} + +eval { + my $record = MARC::Record->new_from_xml($marc->{rows}[0]->{marc}); +}; +if ($@) { + elog(WARNING, $@); + return 0; +} + +return 1; + +$func$ LANGUAGE PLPERLU; + -- disable triggers on biblio.record_entry to do an update ALTER TABLE biblio.record_entry DISABLE TRIGGER ALL; @@ -14,3 +44,6 @@ ALTER TABLE biblio.record_entry ENABLE TRIGGER ALL; -- disable triggers on asset.copy ALTER TABLE asset.copy DISABLE TRIGGER ALL; +-- go ahead and find problem records if any exist because we don't want them to interfere with progress +SELECT id FROM biblio.record_entry WHERE is_valid_marcxml(marc) IS FALSE; + -- 2.11.0