Revert "A tested version of the PostgreSQL extension."

author Swenyu Duan <dsy@sina.com>

Sat, 21 Jul 2012 02:31:45 +0000 (22:31 -0400)

committer Swenyu Duan <dsy@sina.com>

Sat, 21 Jul 2012 02:31:45 +0000 (22:31 -0400)
author Swenyu Duan <dsy@sina.com>
Sat, 21 Jul 2012 02:31:45 +0000 (22:31 -0400)
committer Swenyu Duan <dsy@sina.com>
Sat, 21 Jul 2012 02:31:45 +0000 (22:31 -0400)
diff --git a/Open-ILS/src/sql/Pg/extensions/c_functions--1.0.sql b/Open-ILS/src/sql/Pg/extensions/c_functions--1.0.sql

index 023e109..5f3049d 100755 (executable)
--- a/Open-ILS/src/sql/Pg/extensions/c_functions--1.0.sql
+++ b/Open-ILS/src/sql/Pg/extensions/c_functions--1.0.sql
@@ -1,7 +1,3 @@
-CREATE OR REPLACE FUNCTION oils.xslt.process( TEXT, TEXT) RETURNS TEXT
-       AS 'c_functions.so', 'oils.xslt.process'
-       LANGUAGE C STRICT IMMUTABLE;
-
  CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT 
         AS 'c_functions.so', 'search_normalize'
         LANGUAGE C STRICT IMMUTABLE;    
diff --git a/Open-ILS/src/sql/Pg/extensions/makefile b/Open-ILS/src/sql/Pg/extensions/makefile

index 7d2cd8e..8aa4ad3 100644 (file)
--- a/Open-ILS/src/sql/Pg/extensions/makefile
+++ b/Open-ILS/src/sql/Pg/extensions/makefile
@@ -1,9 +1,9 @@
  MODULE_big = c_functions
  EXTENSION = c_functions
-SHLIB_LINK = -licutu -licuuc -licuio -licui18n -licule -liculx -licudata -lxml2 -lxslt
-PG_CPPFLAGS = -fshort-wchar 
+SHLIB_LINK = -licutu -licuuc -licuio -licui18n -licule -liculx -licudata
+#PG_CPPFLAGS = -L/usr/lib -licuuc -licuio -licui18n -licule -liculx -licudata
  DATA = c_functions--1.0.sql
-OBJS = normalize.functions_in_c.o xml.functions_in_c.o
+OBJS = normalize.functions_in_c.o
  
  PG_CONFIG = pg_config
  PGXS := $(shell $(PG_CONFIG) --pgxs)
diff --git a/Open-ILS/src/sql/Pg/extensions/normalize.functions_in_c.c b/Open-ILS/src/sql/Pg/extensions/normalize.functions_in_c.c

index a2128bb..26e48e0 100755 (executable)
--- a/Open-ILS/src/sql/Pg/extensions/normalize.functions_in_c.c
+++ b/Open-ILS/src/sql/Pg/extensions/normalize.functions_in_c.c
@@ -1,718 +1,608 @@
-/************************************************************************/
-/*  C Implementation: public.search_normalize public.naco_normalize
- *
- *  Descritption:
- *    This file implement public.search_normalize and public.naco_normalize.
- *  These two functions is included in the PostgreSQL extension c_functions.
- *  ICU4C and postgres lib is needed to build this file.
- *
- *  Author: Swenyu Duan <dsy88@sina.com>, (C) 2012
- *
- *  Copyright: See COPYING file that comes with this distribution.
- */
-/************************************************************************/
-#include "postgres.h"
-#include "fmgr.h"
-#include "unicode/unorm2.h"
-#include "unicode/utypes.h"
-#include "unicode/ustring.h"
-#include "unicode/uregex.h"
-#include "unicode/umachine.h"
-
-#ifdef PG_MODULE_MAGIC
-PG_MODULE_MAGIC;
-#endif
-
-static int32_t regexp_expand_string(const UChar *src,
-                                    int32_t src_len,
-                                    UChar *des,
-                                    int32_t des_capacity)
-{
-    int des_len;
-    UChar s, e;
-    const UChar *cur_pos;
-    const UChar *pre_pos;
-
-    if (src == NULL)
-    {
-        return 0;
-    }
-    pre_pos = src;
-    cur_pos = u_strchr(src, L'-');
-       des_len = 0;
-
-    while (cur_pos != NULL)
-    {
-        if (cur_pos + 1 > src + src_len)
-        {
-            //Error!
-            //The pattern is end with a '-'.
-            return -1;
-        }
-
-        while (pre_pos < cur_pos - 1)
-        {
-            if (des != NULL)
-            {
-                des[des_len] = *pre_pos;
-            }
-            des_len++;
-                               pre_pos++;
-        }
-
-        cur_pos ++;
-
-        for(s = *pre_pos; s < *cur_pos; s++)
-        {
-            if (des != NULL)
-            {
-                des[des_len] = s;
-            }
-            des_len++;
-        }
-
-        pre_pos = cur_pos;
-
-        cur_pos = u_strchr(cur_pos, L'-');
-    }
-
-    return des_len;
-}
-
-static int32_t regexp_transliterate(const UChar *search_list,
-                                                                       int32_t search_list_len,
-                                                                       const UChar *replacement_list,
-                                                                       int32_t replacement_list_len,
-                                                                       UChar *src,
-                                                                       int32_t src_len,
-                                                                       UChar *des,
-                                                                       int32_t des_capacity)
-{
-       int i, j;
-       int32_t des_len;
-       UChar *cur_pos;
-
-       if (search_list == NULL || 
-               replacement_list == NULL)
-       {
-               return 0;
-       }
-
-       des_len = 0;
-
-       for (i = 0; i < src_len; i++)
-       {
-               if (des != NULL)
-               {
-                       des[des_len] = src[i];
-               }
-               des_len++;
-
-               if(cur_pos = u_strchr(search_list, src[i]))
-               {
-                       if(cur_pos - search_list > replacement_list_len)
-                               des_len--;
-               }
-       }
-       if (des == NULL || des_len == 0)
-       {
-               return des_len;
-       }
-
-       
-       for (i = 0; i < replacement_list_len; i++)
-       {
-               cur_pos = u_strchr(des, search_list[i]);
-
-               while (cur_pos != NULL)
-               {
-                       *cur_pos = replacement_list[i];
-
-                       //In case cur_pos is the last char in des.
-                       if (cur_pos >= des + des_len)
-                       {
-                               break;
-                       }
-                       cur_pos = u_strchr(cur_pos + 1, search_list[i]);
-               }
-       }
-
-       return des_len;
-
-}
-
-static int32_t regexp_replace(const UChar *regexp,
-                                                         int32_t regexp_len,
-                                                         const UChar *replacement,
-                                                         int32_t replacement_len,
-                                                         UChar *src,
-                                                         int32_t src_len,
-                                                         UChar *des,
-                                                         int32_t des_capacity,
-                                                         int is_global)
-{
-       URegularExpression *regular_exp;
-       UErrorCode status;
-       UParseError pe;
-       int32_t len;
-
-       if (regexp == NULL || replacement == NULL || src == NULL)
-       {
-               return 0;
-       }
-
-       status = 0;
-
-       
-       regular_exp = uregex_open(regexp, regexp_len, 0, &pe, &status);
-       if (regular_exp == NULL)
-       {
-               return 0;
-       }
-
-       status = 0;
-       uregex_setText(regular_exp, src, src_len, &status);
-       
-       status = 0;
-       if (is_global > 0)
-       {
-               len = uregex_replaceAll(regular_exp,
-                                                               replacement,
-                                                               replacement_len,
-                                                               NULL,
-                                                               0,
-                                                               &status);
-               if (des == NULL || des_capacity < len)
-               {
-                       uregex_close(regular_exp);
-                       return len;
-               }
-               status = 0;
-
-               uregex_replaceAll(regular_exp,
-                                                 replacement,
-                                                 replacement_len,
-                                                 des,
-                                                 des_capacity,
-                                                 &status);
-       }
-       else
-       {
-               len = uregex_replaceFirst(regular_exp,
-                                                                 replacement,
-                                                                 replacement_len,
-                                                                 NULL,
-                                                                 0,
-                                                                 &status);
-               if (des == NULL || des_capacity < len)
-               {
-                       uregex_close(regular_exp);
-                       return len;
-               }
-
-               status = 0;
-               uregex_replaceFirst(regular_exp,
-                                                       replacement,
-                                                       replacement_len,
-                                                       des,
-                                                       des_capacity,
-                                                       &status);
-       }
-       
-       uregex_close(regular_exp);
-       return len;
-}
-
-static UChar *u_strtransliterate(UChar *search_list,
-                                                                UChar *replacement_list,
-                                                                UChar *str,
-                                                                int32_t str_capacity)
-{
-       int32_t search_list_len, replacement_list_len, str_len;
-       UChar *des;
-       int32_t des_len;
-
-       if (search_list == NULL || replacement_list == NULL || str == NULL)
-       {
-               return NULL;
-       }
-
-       search_list_len = u_strlen(search_list);
-       replacement_list_len = u_strlen(replacement_list);
-       str_len = u_strlen(str);
-
-       des_len = regexp_transliterate(search_list, 
-                                                                  search_list_len,
-                                                                  replacement_list,
-                                                                  replacement_list_len,
-                                                                  str,
-                                                                  str_len,
-                                                                  NULL,
-                                                                  0);
-       des = palloc((des_len + 1) * sizeof(UChar));
-       des_len = regexp_transliterate(search_list, 
-                                                                       search_list_len,
-                                                                       replacement_list,
-                                                                       replacement_list_len,
-                                                                       str,
-                                                                       str_len,
-                                                                       des,
-                                                                       des_len);
-       if(des != NULL)
-               des[des_len] = '\0';
-
-       pfree(str);
-       return des;
-}
-
-static UChar *u_strreplace(UChar *regexp,
-                              UChar *replacement,
-                                                  UChar *str,
-                                                  int32_t str_capacity,
-                                                  int is_global)
-{
-       int32_t regexp_len, replacement_len, str_len;
-       UChar *des;
-       int32_t des_len;
-
-       if (regexp == NULL || replacement == NULL || str == NULL)
-       {
-               return NULL;
-       }
-
-       regexp_len = u_strlen(regexp);
-       replacement_len = u_strlen(replacement);
-       str_len = u_strlen(str);
-
-       des_len = regexp_replace(regexp,
-                                                        regexp_len,
-                                                        replacement,
-                                                        replacement_len,
-                                                        str,
-                                                        str_len,
-                                                        NULL,
-                                                        0,
-                                                        is_global);
-
-       des = palloc((des_len + 1)* sizeof(UChar));
-
-       des_len = regexp_replace(regexp,
-                                                       regexp_len,
-                                                       replacement,
-                                                       replacement_len,
-                                                       str,
-                                                       str_len,
-                                                       des,
-                                                       des_len,
-                                                       is_global);
-       if(des != NULL)
-               des[des_len] = '\0';
-
-       pfree(str);
-       return des;
-}
-
-static UChar *additional_substitutions(UChar *nustr, int is_search)
-{
-       UChar uregexp[200], replacement[200];
-
-       if (nustr == NULL)
-       {
-               return NULL;
-       }
-
-       u_uastrncpy(uregexp, "\\x{00C6}", sizeof("\\x{00C6}"));
-       u_uastrncpy(replacement, "AE", sizeof("AE"));
-       
-       nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
-
-       u_uastrncpy(uregexp, "\\x{00DE}", sizeof("\\x{00DE}"));
-       u_uastrncpy(replacement, "TH", sizeof("TH"));
-       
-       nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
-
-       u_uastrncpy(uregexp, "\\x{0152}", sizeof("\\x{0152}"));
-       u_uastrncpy(replacement, "OE", sizeof("OE"));
-
-       nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
-
-       if (is_search)
-       {
-               u_strncpy(uregexp, 
-                                       L"\x0110\x00D0\x00D8\x0141\x2113\x02BB\x02BC][",
-                                       sizeof(L"\x0110\x00D0\x00D8\x0141\x2113\x02BB"
-                                                       L"\x02BC][") / sizeof(UChar));
-
-       }
-       else
-       {
-               u_strncpy(uregexp, 
-                               L"\x0110\x00D0\x00D8\x0141\x2113\x02BB\x02BC]['",
-                               sizeof(L"\x0110\x00D0\x00D8\x0141\x2113"
-                                               L"\x02BB\x02BC]['") / sizeof(UChar));
-
-       }
-       u_uastrncpy(replacement, "DDOLl", sizeof("DDOLl"));
-       
-       nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));
-
-       return nustr;
-}
-
-static UChar *transformations_on_unicode(UChar *nustr, UChar *usf)
-{
-       UChar uregexp[200], replacement[200];
-       UChar *comma;
-       int32_t nustr_len;
-
-       if (nustr == NULL)
-       {
-               return NULL;
-       }
-
-       nustr_len = u_strlen(nustr);
-
-       u_uastrncpy(uregexp,
-                               "[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cs}\\p{Lm}\\p{Mc}\\p{Me}\\p{Mn}]",
-                               sizeof("[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cs}\\p{Lm}\\p{Mc}\\p{Me}\\p{Mn}]"));
-       u_uastrncpy(replacement, "", sizeof(""));
-       
-       nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
-
-       if (usf != NULL && usf[0] == 0x61) //0x61 == 'a' in utf16
-       {
-               comma = u_strchr(nustr, 0x2c); //0x2c == ',' in utf16
-               if (comma != NULL)
-               {
-                       if (comma != nustr + nustr_len - 1)
-                       {
-                               u_uastrncpy(uregexp, ",", sizeof(","));
-                               replacement[0] = 0x7;
-                               replacement[1] = 0;
-
-                               nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 0);
-                       }
-               }
-       }
-
-       return nustr;
-}
-
-static UChar *replace_placehoders(UChar *nustr)
-{
-       UChar uregexp[200], replacement[200];
-
-       if (nustr == NULL)
-       {
-               return NULL;
-       }
-
-       u_strncpy(uregexp,
-                               L"+&@\x266D\x266F#",
-                               sizeof(L"+&@\x266D\x266F#") / sizeof(UChar));
-       u_strncpy(replacement,
-                               L"\x01\x02\x03\x04\x05\x06",
-                               sizeof(L"\x01\x02\x03\x04\x05\x06") / sizeof(UChar));
-
-       nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));
-
-       u_uastrncpy(uregexp,
-                               "[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}\\p{Sk}\\p{Sm}\\p{So}\\p{Zl}\\p{Zp}\\p{Zs}]",
-                          sizeof("[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}\\p{Sk}\\p{Sm}\\p{So}\\p{Zl}\\p{Zp}\\p{Zs}]"));
-       u_uastrncpy(replacement, " ", sizeof(" "));
-
-       nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
-
-       u_strncpy(uregexp,
-                       L"\x01\x02\x03\x04\x05\x06\x07",
-                               sizeof(L"\x01\x02\x03\x04\x05\x06\x07") / sizeof(UChar));
-       u_strncpy(replacement,
-                               L"+&@\x266D\x266F#,",
-                               sizeof(L"+&@\x266D\x266F#,") / sizeof(UChar));
-
-       nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));
-
-
-       return nustr;
-}
-
-
-static UChar *decimal_digits(UChar *nustr)
-{
-    UChar *uregexp, *replacement;
-    UChar *expand_exp, *expand_replacement;
-    int32_t uregexp_len, replacement_len;
-    int32_t expand_exp_len, expand_replacement_len;
-
-    if (nustr == NULL)
-    {
-        return NULL;
-    }
-
-    uregexp = L"\x0660-\x0669\x06F0-\x06F9\x07C0-\x07C9\x0966-\x096F"
-        L"\x09E6-\x09EF\x0A66-\x0A6F\x0AE6-\x0AEF\x0B66-\x0B6F"
-        L"\x0BE6-\x0BEF\x0C66-\x0C6F\x0CE6-\x0CEF\x0D66-\x0D6F"
-        L"\x0E50-\x0E59\x0ED0-\x0ED9\x0F20-\x0F29\x1040-\x1049"
-        L"\x1090-\x1099\x17E0-\x17E9\x1810-\x1819\x1946-\x194F"
-        L"\x19D0-\x19D9\x1A80-\x1A89\x1A90-\x1A99\x1B50-\x1B59"
-        L"\x1BB0-\x1BB9\x1C40-\x1C49\x1C50-\x1C59\xA620-\xA629"
-        L"\xA8D0-\xA8D9\xA900-\xA909\xA9D0-\xA9D9\xAA50-\xAA59"
-        L"\xABF0-\xABF9\xFF10-\xFF19";
-
-    uregexp_len = sizeof(L"\x0660-\x0669\x06F0-\x06F9\x07C0-\x07C9\x0966-\x096F"
-        L"\x09E6-\x09EF\x0A66-\x0A6F\x0AE6-\x0AEF\x0B66-\x0B6F"
-        L"\x0BE6-\x0BEF\x0C66-\x0C6F\x0CE6-\x0CEF\x0D66-\x0D6F"
-        L"\x0E50-\x0E59\x0ED0-\x0ED9\x0F20-\x0F29\x1040-\x1049"
-        L"\x1090-\x1099\x17E0-\x17E9\x1810-\x1819\x1946-\x194F"
-        L"\x19D0-\x19D9\x1A80-\x1A89\x1A90-\x1A99\x1B50-\x1B59"
-        L"\x1BB0-\x1BB9\x1C40-\x1C49\x1C50-\x1C59\xA620-\xA629"
-        L"\xA8D0-\xA8D9\xA900-\xA909\xA9D0-\xA9D9\xAA50-\xAA59"
-        L"\xABF0-\xABF9\xFF10-\xFF19") / sizeof(UChar);
-    replacement = L"0-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-"
-        L"90-90-90-90-90-90-90-90-90-90-90-90-90-90-9";
-
-    replacement_len = sizeof(L"0-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-"
-        L"90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-"
-        L"90-90-9") / sizeof(UChar);
-
-    expand_exp_len = regexp_expand_string(uregexp, uregexp_len, NULL, 0);
-
-    expand_exp = palloc(sizeof(UChar) * (expand_exp_len + 1));
-
-    expand_replacement_len = regexp_expand_string(replacement, replacement_len, NULL, 0);
-
-    expand_replacement = palloc(sizeof(UChar) * (expand_replacement_len + 1));
-
-    regexp_expand_string(uregexp, uregexp_len, expand_exp, expand_exp_len);
-    regexp_expand_string(replacement, replacement_len, expand_replacement, expand_replacement_len);
-
-    expand_exp[expand_exp_len] = '\0';
-    expand_replacement[expand_replacement_len] = '\0';
-
-    nustr = u_strtransliterate(expand_exp, expand_replacement, nustr, u_strlen(nustr));
-
-    pfree(expand_exp);
-    pfree(expand_replacement);
-    return nustr;
-}
-
-static UChar *leading_trailing_spaces(UChar * nustr)
-{
-       UChar uregexp[20], replacement[20];
-
-       if (nustr == NULL)
-       {
-               return NULL;
-       }
-
-       u_uastrncpy(uregexp, "\\s+", sizeof("\\s+"));
-       u_uastrncpy(replacement, " ", sizeof(" "));
-
-       nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
-       
-       u_uastrncpy(uregexp, "^\\s+", sizeof("^\\s+"));
-       u_uastrncpy(replacement, "", sizeof(""));
-       nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 0);
-
-       u_uastrncpy(uregexp, "\\s+$", sizeof("\\s+$"));
-
-       nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
-
-       return nustr;
-
-}
-
-text *normalize(text *str, text *sf, int is_search)
-{
-       UChar *ustr = NULL, *nustr = NULL, *temp = NULL, *usf = NULL;
-       int32_t nustr_len, temp_len, str_len, sf_len, ustr_len;
-       UNormalizer2 *normalizer;
-       char *regexp, *result;
-       char *s;
-       UChar uregexp[200], replacement[200];
-       UErrorCode err = 0;
-
-       if (str == NULL || sf == NULL)
-       {
-               return NULL;
-       }
-
-       normalizer = unorm2_getNFKDInstance(&err);
-       if (U_FAILURE(err))
-       {
-               return NULL;
-       }
-
-       s = VARDATA(str);
-
-       str_len = VARSIZE(str) - VARHDRSZ;
-       sf_len = VARSIZE(sf) - VARHDRSZ;
-       temp = palloc((str_len + 1)* sizeof(UChar));
-       usf = palloc((sf_len + 1) * sizeof(UChar));
-
-
-       temp = u_strFromUTF8(temp, str_len, NULL, s, str_len, &err);
-       if (U_FAILURE(err) || temp == NULL)
-       {
-               str = NULL;
-               goto Fail;
-       }
-       
-       usf = u_strFromUTF8(usf, sf_len, NULL, VARDATA(sf), sf_len, &err);
-       if (usf == NULL)
-       {
-               str = NULL;
-               goto Fail;
-       }
-
-       usf[sf_len] = '\0';
-       //Apply NACO normalization to input string; based on
-       //http://www.loc.gov/catdir/pcc/naco/SCA_PccNormalization_Final_revised.pdf
-       //
-       //Note that unlike a strict reading of the NACO normalization rules,
-       //output is returned as lowercase instead of uppercase for compatibility
-       //with previous versions of the Evergreen naco_normalize routine.
-       //
-       //Convert to upper-case first; even though final output will be lowercase, doing this will
-       //ensure that the German eszett (?) and certain ligatures (?, ?, ?, etc.) will be handled correctly.
-       //If there are any bugs in Perl's implementation of upcasing, they will be passed through here.
-       
-       ustr_len = u_strToUpper(NULL,
-                               0,
-                               temp,
-                               str_len,
-                               NULL,
-                               &err);
-
-       err = 0;
-       ustr = palloc((ustr_len + 1) * sizeof(UChar));
-       u_strToUpper(ustr, 
-                    ustr_len,
-                    temp,
-                    str_len,
-                    NULL,
-                    &err);
-       pfree(temp);
-       temp = NULL;
-       ustr[ustr_len] = '\0';
-       if (U_FAILURE(err))
-       {
-               str = NULL;
-               goto Fail;
-       }
-
-       u_uastrncpy(uregexp,
-                   "\\x{0098}.*?\\x{009C}",
-                   sizeof("\\x{0098}.*?\\x{009C}"));
-       u_uastrncpy(replacement, "", sizeof(""));
-
-       ustr = u_strreplace(uregexp, replacement, ustr, u_strlen(ustr), 1);
-       ustr_len = u_strlen(ustr);
-
-       nustr_len = unorm2_normalize(normalizer, ustr, ustr_len, NULL, 0, &err);
-       
-       //To store the '\0';
-
-       nustr = palloc((nustr_len + 1)* sizeof(UChar));
-       err = 0;
-
-       unorm2_normalize(normalizer, ustr, ustr_len, nustr, nustr_len, &err);
-       if (U_FAILURE(err))
-       {
-               str = NULL;
-               goto Fail;
-       }
-       nustr[nustr_len] = '\0';
-
-       //additional substitutions - 3.6.
-       nustr = additional_substitutions(nustr, is_search);
-
-       //transformations based on Unicode category codes
-       nustr = transformations_on_unicode(nustr, usf);
-
-       //since we've stripped out the control characters, we can now
-       //use a few as placeholders temporarily
-       nustr = replace_placehoders(nustr);
-
-       //decimal digit
-       nustr = decimal_digits(nustr);
-
-       //intentionally skipping step 8 of the NACO algorithm; if the string
-       //gets normalized away, that's fine.
-
-       //leading and trailing spaces
-       nustr = leading_trailing_spaces(nustr);
-       nustr_len = u_strlen(nustr);
-
-       temp = palloc(nustr_len * sizeof(UChar));
-       u_strToLower(temp, nustr_len, nustr, nustr_len, NULL, &err);
-       temp_len = nustr_len;
-       err = 0;
-
-       u_strToUTF8(NULL, 0, &str_len, temp, temp_len, &err);
-       
-       err = 0;
-       str = (text *)palloc(str_len + VARHDRSZ);
-       SET_VARSIZE(str, str_len + VARHDRSZ);
-
-       result = u_strToUTF8(VARDATA(str), str_len, &str_len, temp, temp_len, &err); 
-       
-       if(result != NULL)
-               result[str_len] = '\0';
-
-       
-Fail:
-       if(temp != NULL)
-               pfree(temp);
-       if(ustr != NULL)
-               pfree(ustr);
-       if(usf != NULL)
-               pfree(usf);
-       if(nustr != NULL)
-               pfree(nustr);
-
-       return str;
-}
-
-PG_FUNCTION_INFO_V1(naco_normalize);
-
-Datum naco_normalize(PG_FUNCTION_ARGS)
-{
-       text *str = PG_GETARG_TEXT_P(0);
-       text *sf = PG_GETARG_TEXT_P(1);
-       
-
-       if (str == NULL || sf == NULL)
-       {
-               PG_RETURN_TEXT_P(NULL);
-       }
-
-       str = normalize(str, sf, 0);
-       
-       if(str != NULL)
-               PG_RETURN_TEXT_P(str);
-       else
-               PG_RETURN_NULL();
-}
-
-PG_FUNCTION_INFO_V1(search_normalize);
-
-Datum search_normalize(PG_FUNCTION_ARGS)
-{
-       text *str = PG_GETARG_TEXT_P(0);
-       text *sf = PG_GETARG_TEXT_P(1);
-
-
-       if (str == NULL || sf == NULL)
-       {
-               PG_RETURN_TEXT_P(NULL);
-       }
-
-       str = normalize(str, sf, 1);
-
-       
-       if(str != NULL)
-               PG_RETURN_TEXT_P(str);
-       else
-               PG_RETURN_NULL();
-}
+/************************************************************************/\r
+/*  C Implementation: public.search_normalize public.naco_normalize\r
+ *\r
+ *  Descritption:\r
+ *    This file implement public.search_normalize and public.naco_normalize.\r
+ *  These two functions is included in the PostgreSQL extension c_functions.\r
+ *  ICU4C and postgres lib is needed to build this file.\r
+ *\r
+ *  Author: Swenyu Duan <dsy88@sina.com>, (C) 2012\r
+ *\r
+ *  Copyright: See COPYING file that comes with this distribution.\r
+ */\r
+/************************************************************************/\r
+#include "postgres.h"\r
+#include "string.h"\r
+#include "fmgr.h"\r
+#include "unicode/unorm2.h"\r
+#include "unicode/utypes.h"\r
+#include "unicode/ustring.h"\r
+#include "unicode/uregex.h"\r
+#include "unicode/umachine.h"\r
+\r
+#ifdef PG_MODULE_MAGIC\r
+PG_MODULE_MAGIC;\r
+#endif\r
+\r
+static int32_t regexp_transliterate(const UChar *search_list,\r
+                                    int32_t search_list_len,\r
+                                    const UChar *replacement_list,\r
+                                    int32_t replacement_list_len,\r
+                                    UChar *src,\r
+                                    int32_t src_len,\r
+                                    UChar *des,\r
+                                    int32_t des_capacity)\r
+{\r
+    int i, j;\r
+    int32_t des_len;\r
+    UChar *cur_pos;\r
+\r
+    if (search_list == NULL || \r
+        replacement_list == NULL ||\r
+        des == NULL ||\r
+        src_len > des_capacity + 1)\r
+    {\r
+        return 0;\r
+    }\r
+\r
+    des_len = 0;\r
+\r
+    for (i = 0; i < src_len; i++)\r
+    {\r
+        if (des != NULL)\r
+        {\r
+            des[des_len] = src[i];\r
+        }\r
+        des_len++;\r
+\r
+        for (j = replacement_list_len; j < search_list_len; j++)\r
+        {\r
+            if (search_list[j] == src[i])\r
+            {\r
+                des_len--;\r
+                break;\r
+            }\r
+        }\r
+    }\r
+    if (des == NULL)\r
+    {\r
+        //To store the tail '\\0'.\r
+        return des_len + 1;\r
+    }\r
+\r
+    des[des_len] = '\0';\r
+    \r
+    for (i = 0; i < replacement_list_len; i++)\r
+    {\r
+        cur_pos = u_strchr(des, search_list[i]);\r
+\r
+        while (cur_pos != NULL)\r
+        {\r
+            *cur_pos = replacement_list[i];\r
+\r
+            //In case cur_pos is the last char in des.\r
+            if (cur_pos >= des + des_len)\r
+            {\r
+                break;\r
+            }\r
+            cur_pos = u_strchr(cur_pos + 1, search_list[i]);\r
+        }\r
+    }\r
+\r
+    return des_len;\r
+}\r
+\r
+static int32_t regexp_replace(const UChar *regexp,\r
+                              int32_t regexp_len,\r
+                              const UChar *replacement,\r
+                              int32_t replacement_len,\r
+                              UChar *src,\r
+                              int32_t src_len,\r
+                              UChar *des,\r
+                              int32_t des_capacity,\r
+                              int is_global)\r
+{\r
+    URegularExpression *regular_exp;\r
+    UErrorCode status;\r
+    UParseError pe;\r
+    int32_t len;\r
+\r
+    if (regexp == NULL || replacement == NULL || src == NULL)\r
+    {\r
+        return 0;\r
+    }\r
+\r
+    regular_exp = uregex_open(regexp, regexp_len, 0, &pe, &status);\r
+    if (regular_exp == NULL)\r
+    {\r
+        return 0;\r
+    }\r
+\r
+    uregex_setText(regular_exp, src, src_len, &status);\r
+\r
+    if (is_global > 0)\r
+    {\r
+        len = uregex_replaceAll(regular_exp,\r
+                                replacement,\r
+                                replacement_len,\r
+                                NULL,\r
+                                0,\r
+                                &status);\r
+        if (des == NULL || des_capacity < len)\r
+        {\r
+            uregex_close(regular_exp);\r
+            return len;\r
+        }\r
+\r
+        uregex_replaceAll(regular_exp,\r
+                          replacement,\r
+                          replacement_len,\r
+                          des,\r
+                          des_capacity,\r
+                          &status);\r
+    }\r
+    else\r
+    {\r
+        len = uregex_replaceFirst(regular_exp,\r
+                                  replacement,\r
+                                  replacement_len,\r
+                                  NULL,\r
+                                  0,\r
+                                  &status);\r
+        if (des == NULL || des_capacity < len)\r
+        {\r
+            uregex_close(regular_exp);\r
+            return len;\r
+        }\r
+\r
+        uregex_replaceFirst(regular_exp,\r
+                            replacement,\r
+                            replacement_len,\r
+                            des,\r
+                            des_capacity,\r
+                            &status);\r
+    }\r
+    \r
+    uregex_close(regular_exp);\r
+    return len;\r
+}\r
+\r
+static UChar *u_strtransliterate(UChar *search_list,\r
+                                 UChar *replacement_list,\r
+                                 UChar *str,\r
+                                 int32_t str_capacity)\r
+{\r
+    int32_t search_list_len, replacement_list_len, str_len;\r
+    UChar *des;\r
+    int32_t des_len;\r
+\r
+    if (search_list == NULL || replacement_list == NULL || str == NULL)\r
+    {\r
+        return NULL;\r
+    }\r
+\r
+    search_list_len = u_strlen(search_list);\r
+    replacement_list_len = u_strlen(replacement_list);\r
+    str_len = u_strlen(str);\r
+\r
+    des_len = regexp_transliterate(search_list, \r
+                                   search_list_len,\r
+                                   replacement_list,\r
+                                   replacement_list_len,\r
+                                   str,\r
+                                   str_len,\r
+                                   NULL,\r
+                                   0);\r
+    des = palloc(des_len * sizeof(UChar));\r
+    des_len = regexp_transliterate(search_list, \r
+                                    search_list_len,\r
+                                    replacement_list,\r
+                                    replacement_list_len,\r
+                                    str,\r
+                                    str_len,\r
+                                    des,\r
+                                    des_len);\r
+\r
+    pfree(str);\r
+    return des;\r
+}\r
+\r
+static UChar *u_strreplace(UChar *regexp,\r
+                           UChar *replacement,\r
+                           UChar *str,\r
+                           int32_t str_capacity,\r
+                           int is_global)\r
+{\r
+    int32_t regexp_len, replacement_len, str_len;\r
+    UChar *des;\r
+    int32_t des_len;\r
+\r
+    if (regexp == NULL || replacement == NULL || str == NULL)\r
+    {\r
+        return NULL;\r
+    }\r
+\r
+    regexp_len = u_strlen(regexp);\r
+    replacement_len = u_strlen(replacement);\r
+    str_len = u_strlen(str);\r
+\r
+    des_len = regexp_replace(regexp,\r
+                             regexp_len,\r
+                             replacement,\r
+                             replacement_len,\r
+                             str,\r
+                             str_len,\r
+                             NULL,\r
+                             0,\r
+                             is_global);\r
+\r
+    des = palloc(des_len * sizeof(UChar));\r
+\r
+    des_len = regexp_replace(regexp,\r
+                            regexp_len,\r
+                            replacement,\r
+                            replacement_len,\r
+                            str,\r
+                            str_len,\r
+                            des,\r
+                            des_len,\r
+                            is_global);\r
+    pfree(str);\r
+    return des;\r
+}\r
+\r
+UChar *additional_substitutions(UChar *nustr, int is_search)\r
+{\r
+    char *regexp;\r
+    UChar uregexp[200], replacement[200];\r
+\r
+    if (nustr == NULL)\r
+    {\r
+        return NULL;\r
+    }\r
+\r
+    regexp = "\\x{00C6}";\r
+    u_uastrncpy(uregexp, regexp, strlen(regexp));\r
+    u_uastrncpy(replacement, "AE", strlen("AE"));\r
+    \r
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);\r
+\r
+    regexp = "\\x{00DE}";\r
+    u_uastrncpy(uregexp, regexp, strlen(regexp));\r
+    u_uastrncpy(replacement, "TH", strlen("TH"));\r
+    \r
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);\r
+\r
+    regexp = "\\x{0152}";\r
+    u_uastrncpy(uregexp, regexp, strlen(regexp));\r
+    u_uastrncpy(replacement, "OE", strlen("OE"));\r
+\r
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);\r
+\r
+    if (is_search)\r
+    {\r
+        regexp = "\\x{0110}\\x{00D0}\\x{00D8}\\x{0141}\\x{2113}\\x{02BB}\\x{02BC}][";\r
+    }\r
+    else\r
+    {\r
+        regexp = "\\x{0110}\\x{00D0}\\x{00D8}\\x{0141}\\x{2113}\\x{02BB}\\x{02BC}]['";\r
+    }\r
+    \r
+    u_uastrncpy(uregexp, regexp, strlen(regexp));\r
+    u_uastrncpy(replacement, "OE", strlen("OE"));\r
+    \r
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);\r
+\r
+    regexp = "\\x{0110}\\x{00D0}\\x{00D8}\\x{0141}\\x{2113}\\x{02BB}\\x{02BC}]['";\r
+    u_uastrncpy(uregexp, regexp, strlen(regexp));\r
+    u_uastrncpy(replacement, "DDOLl", strlen("DDOLl"));\r
+    \r
+    nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));\r
+\r
+    return nustr;\r
+}\r
+\r
+UChar *transformations_on_unicode(UChar *nustr, UChar *usf)\r
+{\r
+    char *regexp;\r
+    UChar uregexp[200], replacement[200];\r
+    UChar *comma;\r
+    int32_t nustr_len;\r
+\r
+    if (nustr == NULL)\r
+    {\r
+        return NULL;\r
+    }\r
+\r
+    nustr_len = u_strlen(nustr);\r
+\r
+    regexp = "[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cs}\\p{Lm}\\p{Mc}\\p{Me}\\p{Mn}]";\r
+    u_uastrncpy(uregexp, regexp, strlen(regexp));\r
+    u_uastrncpy(replacement, "", strlen(""));\r
+    \r
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);\r
+\r
+    if (usf != NULL && usf[0] == 0x61) //0x61 == 'a' in utf16\r
+    {\r
+        comma = u_strchr(nustr, 0x2c); //0x2c == ',' in utf16\r
+        if (comma != NULL)\r
+        {\r
+            if (comma != nustr + nustr_len - 1)\r
+            {\r
+                regexp = ",";\r
+                u_uastrncpy(uregexp, regexp, strlen(regexp));\r
+                replacement[0] = 0x7;\r
+                replacement[1] = 0;\r
+\r
+                nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 0);\r
+            }\r
+        }\r
+    }\r
+\r
+    return nustr;\r
+}\r
+\r
+UChar *replace_placehoders(UChar *nustr)\r
+{\r
+    UChar uregexp[200], replacement[200];\r
+\r
+    if (nustr == NULL)\r
+    {\r
+        return NULL;\r
+    }\r
+\r
+    u_uastrncpy(uregexp,\r
+                "+&@\\x{266D}\\x{266F}#",\r
+                strlen( "+&@\\x{266D}\\x{266F}#"));\r
+    u_uastrncpy(replacement,\r
+                "\\x01\\x02\\x03\\x04\\x05\\x06",\r
+                strlen("\\x01\\x02\\x03\\x04\\x05\\x06"));\r
+\r
+    nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));\r
+\r
+    u_uastrncpy(uregexp,\r
+                "[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}\\p{Sk}\\p{Sm}\\p{So}\\p{Zl}\\p{Zp}\\p{Zs}]",\r
+               strlen( "[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\\\p{Pi}\\p{Po}\\p{Ps}\\p{Sk}\\p{Sm}\\p{So}\\p{Zl}\\p{Zp}\\p{Zs}]"));\r
+    u_uastrncpy(replacement, "", strlen(""));\r
+\r
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);\r
+\r
+    u_uastrncpy(uregexp,\r
+                "\\x01\\x02\\x03\\x04\\x05\\x06\\x07",\r
+                strlen( "\\x01\\x02\\x03\\x04\\x05\\x06\\x07"));\r
+    u_uastrncpy(replacement,\r
+                "+&@\\x{266D}\\x{266F}#,",\r
+                strlen("+&@\\x{266D}\\x{266F}#,"));\r
+\r
+    nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));\r
+\r
+\r
+    return nustr;\r
+}\r
+\r
+UChar *decimal_digits(UChar *nustr)\r
+{\r
+    UChar uregexp[300], replacement[300];\r
+\r
+    if (nustr == NULL)\r
+    {\r
+        return NULL;\r
+    }\r
+    \r
+    u_uastrncpy(uregexp,\r
+                "\\x{0660}-\\x{0669}\\x{06F0}-\\x{06F9}\\x{07C0}-\\x{07C9}\\x{0966}-\\x{096F}"\r
+                "\\x{09E6}-\\x{09EF}\\x{0A66}-\\x{0A6F}\\x{0AE6}-\\x{0AEF}\\x{0B66}-\\x{0B6F}"\r
+                "\\x{0BE6}-\\x{0BEF}\\x{0C66}-\\x{0C6F}\\x{0CE6}-\\x{0CEF}\\x{0D66}-\\x{0D6F}"\r
+                "\\x{0E50}-\\x{0E59}\\x{0ED0}-\\x{0ED9}\\x{0F20}-\\x{0F29}\\x{1040}-\\x{1049}"\r
+                "\\x{1090}-\\x{1099}\\x{17E0}-\\x{17E9}\\x{1810}-\\x{1819}\\x{1946}-\\x{194F}"\r
+                "\\x{19D0}-\\x{19D9}\\x{1A80}-\\x{1A89}\\x{1A90}-\\x{1A99}\\x{1B50}-\\x{1B59}"\r
+                "\\x{1BB0}-\\x{1BB9}\\x{1C40}-\\x{1C49}\\x{1C50}-\\x{1C59}\\x{A620}-\\x{A629}"\r
+                "\\x{A8D0}-\\x{A8D9}\\x{A900}-\\x{A909}\\x{A9D0}-\\x{A9D9}\\x{AA50}-\\x{AA59}"\r
+                "\\x{ABF0}-\\x{ABF9}\\x{FF10}-\\x{FF19}",\r
+                strlen("\\x{0660}-\\x{0669}\\x{06F0}-\\x{06F9}\\x{07C0}-\\x{07C9}\\x{0966}-\\x{096F}"\r
+                    "\\x{09E6}-\\x{09EF}\\x{0A66}-\\x{0A6F}\\x{0AE6}-\\x{0AEF}\\x{0B66}-\\x{0B6F}"\r
+                    "\\x{0BE6}-\\x{0BEF}\\x{0C66}-\\x{0C6F}\\x{0CE6}-\\x{0CEF}\\x{0D66}-\\x{0D6F}"\r
+                    "\\x{0E50}-\\x{0E59}\\x{0ED0}-\\x{0ED9}\\x{0F20}-\\x{0F29}\\x{1040}-\\x{1049}"\r
+                    "\\x{1090}-\\x{1099}\\x{17E0}-\\x{17E9}\\x{1810}-\\x{1819}\\x{1946}-\\x{194F}"\r
+                    "\\x{19D0}-\\x{19D9}\\x{1A80}-\\x{1A89}\\x{1A90}-\\x{1A99}\\x{1B50}-\\x{1B59}"\r
+                    "\\x{1BB0}-\\x{1BB9}\\x{1C40}-\\x{1C49}\\x{1C50}-\\x{1C59}\\x{A620}-\\x{A629}"\r
+                    "\\x{A8D0}-\\x{A8D9}\\x{A900}-\\x{A909}\\x{A9D0}-\\x{A9D9}\\x{AA50}-\\x{AA59}"\r
+                    "\\x{ABF0}-\\x{ABF9}\\x{FF10}-\\x{FF19}"));\r
+\r
+    u_uastrncpy(replacement,\r
+                "0-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-9",\r
+                strlen("0-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-9"));\r
+\r
+    nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));\r
+\r
+    return nustr;\r
+}\r
+\r
+UChar *leading_trailing_spaces(UChar * nustr)\r
+{\r
+    UChar uregexp[200], replacement[200];\r
+\r
+    if (nustr == NULL)\r
+    {\r
+        return NULL;\r
+    }\r
+\r
+    u_uastrncpy(uregexp, "\\s+",       strlen( "\\s+"));\r
+    u_uastrncpy(replacement, " ", strlen(" "));\r
+\r
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);\r
+    \r
+    u_uastrncpy(uregexp, "^\\s+",      strlen( "^\\s+"));\r
+    u_uastrncpy(replacement, "", strlen(""));\r
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 0);\r
+\r
+    u_uastrncpy(uregexp, "\\s+$",      strlen( "\\s+$"));\r
+\r
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);\r
+\r
+    return nustr;\r
+\r
+}\r
+\r
+text *normalize(text *str, text *sf, int is_search)\r
+{\r
+    UChar *ustr, *nustr, *temp, *usf;\r
+    int32_t nustr_len, temp_len, str_len;\r
+    UNormalizer2 *normalizer;\r
+    char *regexp, *result;\r
+    UChar uregexp[200], replacement[200];\r
+    UErrorCode err;\r
+\r
+    if (str == NULL || sf == NULL)\r
+    {\r
+        return NULL;\r
+    }\r
+\r
+    normalizer = (UNormalizer2 *)unorm2_getNFKDInstance(&err);\r
+    if (U_FAILURE(err))\r
+    {\r
+        return NULL;\r
+    }\r
+\r
+    ustr = palloc(VARSIZE(str) * sizeof(UChar));\r
+    nustr = palloc(VARSIZE(str)* sizeof(UChar));\r
+    temp = palloc(VARSIZE(str) * sizeof(UChar));\r
+    usf = palloc(VARSIZE(sf) * sizeof(UChar));\r
+\r
+    temp = u_strFromUTF8(temp, VARSIZE(str), NULL, VARDATA(str), VARSIZE(str), &err);\r
+    if (U_FAILURE(err) || temp == NULL)\r
+    {\r
+        str = NULL;\r
+        goto Fail;\r
+    }\r
+\r
+    nustr = u_strncpy(nustr, temp, VARSIZE(str));\r
+    if (nustr == NULL)\r
+    {\r
+        str = NULL;\r
+        goto Fail;\r
+    }\r
+\r
+    usf = u_strFromUTF8(usf, VARSIZE(sf), NULL, VARDATA(sf), VARSIZE(sf), &err);\r
+    if (usf == NULL)\r
+    {\r
+        str = NULL;\r
+        goto Fail;\r
+    }\r
+    //Apply NACO normalization to input string; based on\r
+    //http://www.loc.gov/catdir/pcc/naco/SCA_PccNormalization_Final_revised.pdf\r
+    //\r
+    //Note that unlike a strict reading of the NACO normalization rules,\r
+    //output is returned as lowercase instead of uppercase for compatibility\r
+    //with previous versions of the Evergreen naco_normalize routine.\r
+    //\r
+    //Convert to upper-case first; even though final output will be lowercase, doing this will\r
+    //ensure that the German eszett (?) and certain ligatures (?, ?, ?, etc.) will be handled correctly.\r
+    //If there are any bugs in Perl's implementation of upcasing, they will be passed through here.\r
+\r
+    u_strToUpper(ustr, \r
+                VARSIZE(str),\r
+                temp,\r
+                VARSIZE(str),\r
+                NULL,\r
+                &err);\r
+    pfree(temp);\r
+    temp = NULL;\r
+    if (U_FAILURE(err))\r
+    {\r
+        str = NULL;\r
+        goto Fail;\r
+    }\r
+\r
+    regexp = "\\x{0098}.*?\\x{009C}";\r
+    u_uastrncpy(uregexp, regexp, strlen(regexp));\r
+    u_uastrncpy(replacement, "", strlen(""));\r
+\r
+    ustr = u_strreplace(uregexp, replacement, ustr, u_strlen(ustr), 1);\r
+\r
+    unorm2_normalize(normalizer, ustr, VARSIZE(str), nustr, VARSIZE(str), &err);\r
+    if (U_FAILURE(err))\r
+    {\r
+        str = NULL;\r
+        goto Fail;\r
+    }\r
+\r
+    //additional substitutions - 3.6.\r
+    nustr = additional_substitutions(nustr, is_search);\r
+\r
+    //transformations based on Unicode category codes\r
+    nustr = transformations_on_unicode(nustr, usf);\r
+\r
+    //since we've stripped out the control characters, we can now\r
+    //use a few as placeholders temporarily\r
+    nustr = replace_placehoders(nustr);\r
+\r
+    //decimal digit\r
+    nustr = decimal_digits(nustr);\r
+\r
+    //intentionally skipping step 8 of the NACO algorithm; if the string\r
+    //gets normalized away, that's fine.\r
+\r
+    //leading and trailing spaces\r
+    nustr = leading_trailing_spaces(nustr);\r
+    nustr_len = u_strlen(nustr);\r
+\r
+    temp = palloc(nustr_len * sizeof(UChar));\r
+    u_strToLower(temp, nustr_len, nustr, nustr_len, NULL, &err);\r
+    temp_len = nustr_len;\r
+\r
+\r
+    u_strToUTF8(NULL, 0, &str_len, temp, temp_len, &err);\r
+\r
+    SET_VARSIZE(str, str_len + VARHDRSZ);\r
+    str = (text *)palloc(str_len + VARHDRSZ);\r
+    \r
+    result = u_strToUTF8(VARDATA(str), str_len, &str_len, temp, temp_len, &err); \r
+\r
+    result[str_len] = '\0';\r
+\r
+    \r
+Fail:\r
+    pfree(temp);\r
+    pfree(ustr);\r
+    pfree(nustr);\r
+\r
+    return str;\r
+}\r
+\r
+PG_FUNCTION_INFO_V1(naco_normalize);\r
+\r
+Datum naco_normalize(PG_FUNCTION_ARGS)\r
+{\r
+    text *str = PG_GETARG_TEXT_P(0);\r
+    text *sf = PG_GETARG_TEXT_P(1);\r
+    \r
+\r
+    if (str == NULL || sf == NULL)\r
+    {\r
+        PG_RETURN_TEXT_P(NULL);\r
+    }\r
+\r
+    str = normalize(str, sf, 0);\r
+    \r
+    PG_RETURN_TEXT_P(str);\r
+}\r
+\r
+PG_FUNCTION_INFO_V1(search_normalize);\r
+\r
+Datum search_normalize(PG_FUNCTION_ARGS)\r
+{\r
+    text *str = PG_GETARG_TEXT_P(0);\r
+    text *sf = PG_GETARG_TEXT_P(1);\r
+\r
+\r
+    if (str == NULL || sf == NULL)\r
+    {\r
+        PG_RETURN_TEXT_P(NULL);\r
+    }\r
+\r
+    str = normalize(str, sf, 1);\r
+\r
+    PG_RETURN_TEXT_P(str);\r
+}\r
+\r
diff --git a/Open-ILS/src/sql/Pg/extensions/xml.functions_in_c.c b/Open-ILS/src/sql/Pg/extensions/xml.functions_in_c.c

deleted file mode 100755 (executable)

index cbba682..0000000
--- a/Open-ILS/src/sql/Pg/extensions/xml.functions_in_c.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/************************************************************************/
-/*  C Implementation: oils.xslt.process
- *
- *  Descritption:
- *    This file implement oils.xslt.process.
- *    The function is included in the PostgreSQL extension c_functions.
- *  libxml2, libxslt and postgres lib is needed to build this file.
- *
- *  Author: Swenyu Duan <dsy88@sina.com>, (C) 2012
- *
- *  Copyright: See COPYING file that comes with this distribution.
- */
-/************************************************************************/
-#include "postgres.h"
-#include "fmgr.h"
-#include "stdio.h"
-#include "libxml/tree.h"
-#include "libxml/parser.h"
-#include "libxslt/xslt.h"
-#include "libxslt/xsltInternals.h"
-#include "libxslt/transform.h"
-
-PG_FUNCTION_INFO_V1(oils_xslt_process);
-
-Datum oils_xslt_process(PG_FUNCTION_ARGS)
-{
-    text *doc = PG_GETARG_TEXT_P(0);
-    text *xslt = PG_GETARG_TEXT_P(1);
-    text *processed_doc;
-    xmlDocPtr parsed_doc, parsed_xslt;
-    xmlChar *result;
-    xsltStylesheetPtr style_sheet;
-    int doc_len;
-    int xslt_len;
-    int result_len;
-
-    doc_len = VARSIZE(doc);
-    if (doc == NULL || doc_len == 0)
-    {
-        goto Fail;
-    }
-    xslt_len = VARSIZE(xslt);
-    if (xslt == NULL || xslt_len == 0)
-    {
-        goto Fail;
-    }
-    
-    parsed_doc = xmlParseMemory(VARDATA(doc), doc_len);
-    if (parsed_doc == NULL)
-    {
-        goto Fail;
-    }
-    
-    parsed_xslt = xmlParseMemory(VARDATA(xslt), xslt_len);
-    if (parsed_xslt == NULL)
-    {
-        goto Fail;
-    }
-    
-    style_sheet = xsltParseStylesheetDoc(parsed_xslt);
-    if (style_sheet == NULL)
-    {
-        goto Fail;
-    }
-
-    parsed_doc = xsltApplyStylesheet(style_sheet, parsed_doc, NULL);
-    if (parsed_doc == NULL)
-    {
-        goto Fail;
-    }
-
-    xmlDocDumpMemory(parsed_doc, &result, &result_len);
-
-    processed_doc = palloc(result_len + VARHDRSZ);
-    
-    SET_VARSIZE(processed_doc, result_len);
-    //Copy the result.
-    memcpy(VARDATA(processed_doc), result, result_len);
-
-    //Free the space allocated by libxml using malloc.
-    free(result);
-
-    PG_RETURN_TEXT_P(processed_doc);
-
-Fail:
-    PG_RETURN_NULL();
-}
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.create_extension.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.create_extension.sql

deleted file mode 100644 (file)

index b42e790..0000000
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.create_extension.sql
+++ /dev/null
@@ -1,4 +0,0 @@
-DROP FUNCTION public.naco_normalize( TEXT, TEXT);
-DROP FUNCTION public.search_normalize( TEXT, TEXT);
-
-CREATE EXTENSION c_functions;
diff --git a/Open-ILS/tests/naco_normalize.t b/Open-ILS/tests/naco_normalize.t

index 25a27c0..182ebab 100644 (file)
--- a/Open-ILS/tests/naco_normalize.t
+++ b/Open-ILS/tests/naco_normalize.t
@@ -19,7 +19,7 @@ use OpenILS::Utils::Normalize qw( naco_normalize );
  
  # Database connection parameters
  my $db_driver = 'Pg';
-my $db_host   = 'localhost';
+my $db_host   = 'evergreen';
  my $db_port   = '5432';
  my $db_name   = 'evergreen';
  my $db_user   = 'evergreen';
author	Swenyu Duan <dsy@sina.com>
	Sat, 21 Jul 2012 02:31:45 +0000 (22:31 -0400)
committer	Swenyu Duan <dsy@sina.com>
	Sat, 21 Jul 2012 02:31:45 +0000 (22:31 -0400)
Open-ILS/src/sql/Pg/extensions/c_functions--1.0.sql		patch \| blob \| history
Open-ILS/src/sql/Pg/extensions/makefile		patch \| blob \| history
Open-ILS/src/sql/Pg/extensions/normalize.functions_in_c.c		patch \| blob \| history
Open-ILS/src/sql/Pg/extensions/xml.functions_in_c.c	[deleted file]	patch \| blob \| history
Open-ILS/src/sql/Pg/upgrade/XXXX.schema.create_extension.sql	[deleted file]	patch \| blob \| history
Open-ILS/tests/naco_normalize.t		patch \| blob \| history