Improve the ID-matching regex
authordbs <dbs@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Sun, 26 Apr 2009 01:25:48 +0000 (01:25 +0000)
committerdbs <dbs@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Sun, 26 Apr 2009 01:25:48 +0000 (01:25 +0000)
Normalize to composed Unicode characters

git-svn-id: svn://svn.open-ils.org/ILS/trunk@12986 dcc99617-32d9-48b4-a31d-7c20da2025e4

Open-ILS/src/extras/import/marc2sre.pl

index 83de43a..3687257 100755 (executable)
@@ -70,14 +70,14 @@ while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
        # but we can work out call numbers later in SQL by the record ID + call number text
        if ($record_field) {
                $record = $record_field->data;
-               $record =~ s/(\d+)/$1/;
+               $record =~ s/^.*?(\d+).*?$/$1/o;
        }
 
        (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
        $xml =~ s/^<\?xml.+\?\s*>//go;
        $xml =~ s/>\s+</></go;
        $xml =~ s/\p{Cc}//go;
-       $xml = OpenILS::Application::AppUtils->entityize($xml,'D');
+       $xml = OpenILS::Application::AppUtils->entityize($xml);
        $xml =~ s/[\x00-\x1f]//go;
 
        my $bib = new Fieldmapper::serial::record_entry;