From: miker Date: Tue, 16 Mar 2010 14:59:39 +0000 (+0000) Subject: Improved patch from Galen Charlton: removes empty XML elements when ingesting a bib... X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=22f17400fc75fcc9fdb59dca1f88efb22d35559f;p=evergreen%2Fbjwebb.git Improved patch from Galen Charlton: removes empty XML elements when ingesting a bib record git-svn-id: svn://svn.open-ils.org/ILS/trunk@15862 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Cat/BibCommon.pm b/Open-ILS/src/perlmods/OpenILS/Application/Cat/BibCommon.pm index 1b1fbae4d..3dd3d3211 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/Cat/BibCommon.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/Cat/BibCommon.pm @@ -111,14 +111,34 @@ sub __make_marc_doc { my $marcxml = XML::LibXML->new->parse_string($xml); $marcxml->documentElement->setNamespace($MARC_NAMESPACE, "marc", 1 ); $marcxml->documentElement->setNamespace($MARC_NAMESPACE); - # remove empty control fields - at least one source of records adds ersatz blank 008s - # that become empty controlfield elements - foreach my $controlfield ($marcxml->documentElement->getElementsByTagNameNS($MARC_NAMESPACE, 'controlfield')) { - $controlfield->parentNode->removeChild($controlfield) unless $controlfield->hasChildNodes(); - } + __remove_empty_marc_nodes($marcxml); return $marcxml; } +# remove empty control fields, subfields, and variable data fields, which +# can creep in via less-than-correct imported MARC records or issues +# with templates +sub __remove_empty_marc_nodes { + my $marcxml = shift; + + __remove_if_childless($_) foreach $marcxml->documentElement->getElementsByTagNameNS($MARC_NAMESPACE, 'controlfield'); + __remove_if_childless($_) foreach $marcxml->documentElement->getElementsByTagNameNS($MARC_NAMESPACE, 'subfield'); + __remove_if_childless($_) foreach $marcxml->documentElement->getElementsByTagNameNS($MARC_NAMESPACE, 'datafield'); +} + +sub __remove_if_childless { + my $node = shift; + my @children = $node->childNodes(); + my $has_nonblank_children = 0; + # can do this more concisely by requiring XML::LibXML >= 1.70 and using nonBlankChildNodes() + foreach my $node ($node->childNodes()) { + if ($node->nodeType != XML::LibXML::XML_TEXT_NODE || $node->nodeValue !~ /^\s*$/) { + $has_nonblank_children = 1; + last; + } + } + $node->parentNode->removeChild($node) unless $has_nonblank_children; +} sub _find_tcn_info { my $editor = shift;