Another preprocessing script takes shape...
authordbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Fri, 17 Apr 2009 03:39:15 +0000 (03:39 +0000)
committerdbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Fri, 17 Apr 2009 03:39:15 +0000 (03:39 +0000)
git-svn-id: svn://svn.open-ils.org/ILS-Contrib/conifer/trunk@342 6d9bc8c9-1ec2-4278-b937-99fde70a366f

tools/migration-scripts/fix_bad_marcxml.pl [new file with mode: 0644]

diff --git a/tools/migration-scripts/fix_bad_marcxml.pl b/tools/migration-scripts/fix_bad_marcxml.pl
new file mode 100644 (file)
index 0000000..9b202c6
--- /dev/null
@@ -0,0 +1,33 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+foreach my $file (@ARGV) {
+       clean_empty_datafields($file);
+}
+
+sub clean_empty_datafields {
+       my $file = shift;
+
+       # Empty datafields anger MARC::File::XML
+       open(FH, '<', $file) or die $!;
+       open(CLEAN, '>', "$file.new");
+
+       my ($trim, $lastline) = (0, '');
+       while (<FH>) {
+               if ($_ =~ m#</datafield># and $lastline =~ m#<datafield#) {
+                       $trim = 1;
+               } elsif ($trim) {
+                       $trim = 0;
+               } else {
+                       print CLEAN $lastline;
+                       $trim = 0;
+               }
+               
+               $lastline = $_;
+       }
+       print CLEAN $lastline;
+
+       close(FH);
+       close(CLEAN);
+}