Add indexing normalization for Polish l (ł)
authordbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Mon, 13 Sep 2010 14:30:10 +0000 (14:30 +0000)
committerdbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Mon, 13 Sep 2010 14:30:10 +0000 (14:30 +0000)
git-svn-id: svn://svn.open-ils.org/ILS-Contrib/conifer/branches/rel_1_6_1@987 6d9bc8c9-1ec2-4278-b937-99fde70a366f

src/perlmods/OpenILS/Application/Ingest.pm
src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm

index 30892b5..e7e2bf0 100644 (file)
@@ -900,6 +900,9 @@ sub class_index_string_xml {
             $value =~ s/\pC+//sgo;
             $value =~ s/\W+$//sgo;
 
+            $term =~ s/\x{142}/l/sgo; # Convert Polish l (lowercase)
+            $term =~ s/\x{141}/l/sgo; # Convert Polish l (uppercase)
+
             # hack to normalize ratio-like strings
             while ($term =~ /\b\d{1}:[, ]?\d+(?:[ ,]\d+[^:])+/o) {
                 $term = $` . join ('', split(/[, ]/, $&)) . $';
index 055266a..42550d5 100644 (file)
@@ -24,6 +24,9 @@
                $term =~ s/(\pM+)//gos;
                $term =~ s/(\b\.\b)//gos;
 
+               $term =~ s/\x{142}/l/sgo; # Convert Polish l (lowercase)
+               $term =~ s/\x{141}/l/sgo; # Convert Polish l (uppercase)
+
                # hack to normalize ratio-like strings
                while ($term =~ /\b\d{1}:[, ]?\d+(?:[ ,]\d+[^:])+/o) {
                        $term = $` . join ('', split(/[, ]/, $&)) . $';