From 20614d25425ea9defb1678456e8ba46a744a75dc Mon Sep 17 00:00:00 2001
From: Swenyu Duan <dsy@sina.com>
Date: Thu, 31 May 2012 05:02:45 -0400
Subject: [PATCH] Implement search_normalize and naco_normalize in C.

This is the first extension for PostgreSQl.
The lib has not been tested yet.
The normalize.functions_in_c.c relies on ICU4C lib. The makefile links
normalize.functions_in_c.o libs of ICU4C(/usr/lib) and PostgreSQL lib.
c_functions--1.0.sql will replace the original plperlu version of
search_normalize and naco_normalize into C version.
---
 .../src/sql/Pg/extensions/c_functions--1.0.sql     |   9 +
 Open-ILS/src/sql/Pg/extensions/c_functions.control |   3 +
 Open-ILS/src/sql/Pg/extensions/makefile            |  10 +
 .../sql/Pg/extensions/normalize.functions_in_c.c   | 608 +++++++++++++++++++++
 4 files changed, 630 insertions(+)
 create mode 100755 Open-ILS/src/sql/Pg/extensions/c_functions--1.0.sql
 create mode 100755 Open-ILS/src/sql/Pg/extensions/c_functions.control
 create mode 100644 Open-ILS/src/sql/Pg/extensions/makefile
 create mode 100755 Open-ILS/src/sql/Pg/extensions/normalize.functions_in_c.c

diff --git a/Open-ILS/src/sql/Pg/extensions/c_functions--1.0.sql b/Open-ILS/src/sql/Pg/extensions/c_functions--1.0.sql
new file mode 100755
index 0000000000..5f3049d642
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/extensions/c_functions--1.0.sql
@@ -0,0 +1,9 @@
+CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT 
+	AS 'c_functions.so', 'search_normalize'
+	LANGUAGE C STRICT IMMUTABLE;	
+	
+CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT 
+	AS 'c_functions.so', 'naco_normalize'
+	LANGUAGE C STRICT IMMUTABLE;
+
+
diff --git a/Open-ILS/src/sql/Pg/extensions/c_functions.control b/Open-ILS/src/sql/Pg/extensions/c_functions.control
new file mode 100755
index 0000000000..3c25035b27
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/extensions/c_functions.control
@@ -0,0 +1,3 @@
+comment = 'Extensions to convert all the plperlu functions into C functions'
+default_version = '1.0'
+relocatable = true
diff --git a/Open-ILS/src/sql/Pg/extensions/makefile b/Open-ILS/src/sql/Pg/extensions/makefile
new file mode 100644
index 0000000000..8aa4ad3bf2
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/extensions/makefile
@@ -0,0 +1,10 @@
+MODULE_big = c_functions
+EXTENSION = c_functions
+SHLIB_LINK = -licutu -licuuc -licuio -licui18n -licule -liculx -licudata
+#PG_CPPFLAGS = -L/usr/lib -licuuc -licuio -licui18n -licule -liculx -licudata
+DATA = c_functions--1.0.sql
+OBJS = normalize.functions_in_c.o
+
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
diff --git a/Open-ILS/src/sql/Pg/extensions/normalize.functions_in_c.c b/Open-ILS/src/sql/Pg/extensions/normalize.functions_in_c.c
new file mode 100755
index 0000000000..26e48e0db6
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/extensions/normalize.functions_in_c.c
@@ -0,0 +1,608 @@
+/************************************************************************/
+/*  C Implementation: public.search_normalize public.naco_normalize
+ *
+ *  Descritption:
+ *    This file implement public.search_normalize and public.naco_normalize.
+ *  These two functions is included in the PostgreSQL extension c_functions.
+ *  ICU4C and postgres lib is needed to build this file.
+ *
+ *  Author: Swenyu Duan <dsy88@sina.com>, (C) 2012
+ *
+ *  Copyright: See COPYING file that comes with this distribution.
+ */
+/************************************************************************/
+#include "postgres.h"
+#include "string.h"
+#include "fmgr.h"
+#include "unicode/unorm2.h"
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "unicode/uregex.h"
+#include "unicode/umachine.h"
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+static int32_t regexp_transliterate(const UChar *search_list,
+                                    int32_t search_list_len,
+                                    const UChar *replacement_list,
+                                    int32_t replacement_list_len,
+                                    UChar *src,
+                                    int32_t src_len,
+                                    UChar *des,
+                                    int32_t des_capacity)
+{
+    int i, j;
+    int32_t des_len;
+    UChar *cur_pos;
+
+    if (search_list == NULL || 
+        replacement_list == NULL ||
+        des == NULL ||
+        src_len > des_capacity + 1)
+    {
+        return 0;
+    }
+
+    des_len = 0;
+
+    for (i = 0; i < src_len; i++)
+    {
+        if (des != NULL)
+        {
+            des[des_len] = src[i];
+        }
+        des_len++;
+
+        for (j = replacement_list_len; j < search_list_len; j++)
+        {
+            if (search_list[j] == src[i])
+            {
+                des_len--;
+                break;
+            }
+        }
+    }
+    if (des == NULL)
+    {
+        //To store the tail '\\0'.
+        return des_len + 1;
+    }
+
+    des[des_len] = '\0';
+    
+    for (i = 0; i < replacement_list_len; i++)
+    {
+        cur_pos = u_strchr(des, search_list[i]);
+
+        while (cur_pos != NULL)
+        {
+            *cur_pos = replacement_list[i];
+
+            //In case cur_pos is the last char in des.
+            if (cur_pos >= des + des_len)
+            {
+                break;
+            }
+            cur_pos = u_strchr(cur_pos + 1, search_list[i]);
+        }
+    }
+
+    return des_len;
+}
+
+static int32_t regexp_replace(const UChar *regexp,
+                              int32_t regexp_len,
+                              const UChar *replacement,
+                              int32_t replacement_len,
+                              UChar *src,
+                              int32_t src_len,
+                              UChar *des,
+                              int32_t des_capacity,
+                              int is_global)
+{
+    URegularExpression *regular_exp;
+    UErrorCode status;
+    UParseError pe;
+    int32_t len;
+
+    if (regexp == NULL || replacement == NULL || src == NULL)
+    {
+        return 0;
+    }
+
+    regular_exp = uregex_open(regexp, regexp_len, 0, &pe, &status);
+    if (regular_exp == NULL)
+    {
+        return 0;
+    }
+
+    uregex_setText(regular_exp, src, src_len, &status);
+
+    if (is_global > 0)
+    {
+        len = uregex_replaceAll(regular_exp,
+                                replacement,
+                                replacement_len,
+                                NULL,
+                                0,
+                                &status);
+        if (des == NULL || des_capacity < len)
+        {
+            uregex_close(regular_exp);
+            return len;
+        }
+
+        uregex_replaceAll(regular_exp,
+                          replacement,
+                          replacement_len,
+                          des,
+                          des_capacity,
+                          &status);
+    }
+    else
+    {
+        len = uregex_replaceFirst(regular_exp,
+                                  replacement,
+                                  replacement_len,
+                                  NULL,
+                                  0,
+                                  &status);
+        if (des == NULL || des_capacity < len)
+        {
+            uregex_close(regular_exp);
+            return len;
+        }
+
+        uregex_replaceFirst(regular_exp,
+                            replacement,
+                            replacement_len,
+                            des,
+                            des_capacity,
+                            &status);
+    }
+    
+    uregex_close(regular_exp);
+    return len;
+}
+
+static UChar *u_strtransliterate(UChar *search_list,
+                                 UChar *replacement_list,
+                                 UChar *str,
+                                 int32_t str_capacity)
+{
+    int32_t search_list_len, replacement_list_len, str_len;
+    UChar *des;
+    int32_t des_len;
+
+    if (search_list == NULL || replacement_list == NULL || str == NULL)
+    {
+        return NULL;
+    }
+
+    search_list_len = u_strlen(search_list);
+    replacement_list_len = u_strlen(replacement_list);
+    str_len = u_strlen(str);
+
+    des_len = regexp_transliterate(search_list, 
+                                   search_list_len,
+                                   replacement_list,
+                                   replacement_list_len,
+                                   str,
+                                   str_len,
+                                   NULL,
+                                   0);
+    des = palloc(des_len * sizeof(UChar));
+    des_len = regexp_transliterate(search_list, 
+                                    search_list_len,
+                                    replacement_list,
+                                    replacement_list_len,
+                                    str,
+                                    str_len,
+                                    des,
+                                    des_len);
+
+    pfree(str);
+    return des;
+}
+
+static UChar *u_strreplace(UChar *regexp,
+                           UChar *replacement,
+                           UChar *str,
+                           int32_t str_capacity,
+                           int is_global)
+{
+    int32_t regexp_len, replacement_len, str_len;
+    UChar *des;
+    int32_t des_len;
+
+    if (regexp == NULL || replacement == NULL || str == NULL)
+    {
+        return NULL;
+    }
+
+    regexp_len = u_strlen(regexp);
+    replacement_len = u_strlen(replacement);
+    str_len = u_strlen(str);
+
+    des_len = regexp_replace(regexp,
+                             regexp_len,
+                             replacement,
+                             replacement_len,
+                             str,
+                             str_len,
+                             NULL,
+                             0,
+                             is_global);
+
+    des = palloc(des_len * sizeof(UChar));
+
+    des_len = regexp_replace(regexp,
+                            regexp_len,
+                            replacement,
+                            replacement_len,
+                            str,
+                            str_len,
+                            des,
+                            des_len,
+                            is_global);
+    pfree(str);
+    return des;
+}
+
+UChar *additional_substitutions(UChar *nustr, int is_search)
+{
+    char *regexp;
+    UChar uregexp[200], replacement[200];
+
+    if (nustr == NULL)
+    {
+        return NULL;
+    }
+
+    regexp = "\\x{00C6}";
+    u_uastrncpy(uregexp, regexp, strlen(regexp));
+    u_uastrncpy(replacement, "AE", strlen("AE"));
+    
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
+
+    regexp = "\\x{00DE}";
+    u_uastrncpy(uregexp, regexp, strlen(regexp));
+    u_uastrncpy(replacement, "TH", strlen("TH"));
+    
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
+
+    regexp = "\\x{0152}";
+    u_uastrncpy(uregexp, regexp, strlen(regexp));
+    u_uastrncpy(replacement, "OE", strlen("OE"));
+
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
+
+    if (is_search)
+    {
+        regexp = "\\x{0110}\\x{00D0}\\x{00D8}\\x{0141}\\x{2113}\\x{02BB}\\x{02BC}][";
+    }
+    else
+    {
+        regexp = "\\x{0110}\\x{00D0}\\x{00D8}\\x{0141}\\x{2113}\\x{02BB}\\x{02BC}]['";
+    }
+    
+    u_uastrncpy(uregexp, regexp, strlen(regexp));
+    u_uastrncpy(replacement, "OE", strlen("OE"));
+    
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
+
+    regexp = "\\x{0110}\\x{00D0}\\x{00D8}\\x{0141}\\x{2113}\\x{02BB}\\x{02BC}]['";
+    u_uastrncpy(uregexp, regexp, strlen(regexp));
+    u_uastrncpy(replacement, "DDOLl", strlen("DDOLl"));
+    
+    nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));
+
+    return nustr;
+}
+
+UChar *transformations_on_unicode(UChar *nustr, UChar *usf)
+{
+    char *regexp;
+    UChar uregexp[200], replacement[200];
+    UChar *comma;
+    int32_t nustr_len;
+
+    if (nustr == NULL)
+    {
+        return NULL;
+    }
+
+    nustr_len = u_strlen(nustr);
+
+    regexp = "[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cs}\\p{Lm}\\p{Mc}\\p{Me}\\p{Mn}]";
+    u_uastrncpy(uregexp, regexp, strlen(regexp));
+    u_uastrncpy(replacement, "", strlen(""));
+    
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
+
+    if (usf != NULL && usf[0] == 0x61) //0x61 == 'a' in utf16
+    {
+        comma = u_strchr(nustr, 0x2c); //0x2c == ',' in utf16
+        if (comma != NULL)
+        {
+            if (comma != nustr + nustr_len - 1)
+            {
+                regexp = ",";
+                u_uastrncpy(uregexp, regexp, strlen(regexp));
+                replacement[0] = 0x7;
+                replacement[1] = 0;
+
+                nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 0);
+            }
+        }
+    }
+
+    return nustr;
+}
+
+UChar *replace_placehoders(UChar *nustr)
+{
+    UChar uregexp[200], replacement[200];
+
+    if (nustr == NULL)
+    {
+        return NULL;
+    }
+
+    u_uastrncpy(uregexp,
+                "+&@\\x{266D}\\x{266F}#",
+                strlen( "+&@\\x{266D}\\x{266F}#"));
+    u_uastrncpy(replacement,
+                "\\x01\\x02\\x03\\x04\\x05\\x06",
+                strlen("\\x01\\x02\\x03\\x04\\x05\\x06"));
+
+    nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));
+
+    u_uastrncpy(uregexp,
+                "[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}\\p{Sk}\\p{Sm}\\p{So}\\p{Zl}\\p{Zp}\\p{Zs}]",
+               strlen( "[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\\\p{Pi}\\p{Po}\\p{Ps}\\p{Sk}\\p{Sm}\\p{So}\\p{Zl}\\p{Zp}\\p{Zs}]"));
+    u_uastrncpy(replacement, "", strlen(""));
+
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
+
+    u_uastrncpy(uregexp,
+                "\\x01\\x02\\x03\\x04\\x05\\x06\\x07",
+                strlen( "\\x01\\x02\\x03\\x04\\x05\\x06\\x07"));
+    u_uastrncpy(replacement,
+                "+&@\\x{266D}\\x{266F}#,",
+                strlen("+&@\\x{266D}\\x{266F}#,"));
+
+    nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));
+
+
+    return nustr;
+}
+
+UChar *decimal_digits(UChar *nustr)
+{
+    UChar uregexp[300], replacement[300];
+
+    if (nustr == NULL)
+    {
+        return NULL;
+    }
+    
+    u_uastrncpy(uregexp,
+                "\\x{0660}-\\x{0669}\\x{06F0}-\\x{06F9}\\x{07C0}-\\x{07C9}\\x{0966}-\\x{096F}"
+                "\\x{09E6}-\\x{09EF}\\x{0A66}-\\x{0A6F}\\x{0AE6}-\\x{0AEF}\\x{0B66}-\\x{0B6F}"
+                "\\x{0BE6}-\\x{0BEF}\\x{0C66}-\\x{0C6F}\\x{0CE6}-\\x{0CEF}\\x{0D66}-\\x{0D6F}"
+                "\\x{0E50}-\\x{0E59}\\x{0ED0}-\\x{0ED9}\\x{0F20}-\\x{0F29}\\x{1040}-\\x{1049}"
+                "\\x{1090}-\\x{1099}\\x{17E0}-\\x{17E9}\\x{1810}-\\x{1819}\\x{1946}-\\x{194F}"
+                "\\x{19D0}-\\x{19D9}\\x{1A80}-\\x{1A89}\\x{1A90}-\\x{1A99}\\x{1B50}-\\x{1B59}"
+                "\\x{1BB0}-\\x{1BB9}\\x{1C40}-\\x{1C49}\\x{1C50}-\\x{1C59}\\x{A620}-\\x{A629}"
+                "\\x{A8D0}-\\x{A8D9}\\x{A900}-\\x{A909}\\x{A9D0}-\\x{A9D9}\\x{AA50}-\\x{AA59}"
+                "\\x{ABF0}-\\x{ABF9}\\x{FF10}-\\x{FF19}",
+                strlen("\\x{0660}-\\x{0669}\\x{06F0}-\\x{06F9}\\x{07C0}-\\x{07C9}\\x{0966}-\\x{096F}"
+                    "\\x{09E6}-\\x{09EF}\\x{0A66}-\\x{0A6F}\\x{0AE6}-\\x{0AEF}\\x{0B66}-\\x{0B6F}"
+                    "\\x{0BE6}-\\x{0BEF}\\x{0C66}-\\x{0C6F}\\x{0CE6}-\\x{0CEF}\\x{0D66}-\\x{0D6F}"
+                    "\\x{0E50}-\\x{0E59}\\x{0ED0}-\\x{0ED9}\\x{0F20}-\\x{0F29}\\x{1040}-\\x{1049}"
+                    "\\x{1090}-\\x{1099}\\x{17E0}-\\x{17E9}\\x{1810}-\\x{1819}\\x{1946}-\\x{194F}"
+                    "\\x{19D0}-\\x{19D9}\\x{1A80}-\\x{1A89}\\x{1A90}-\\x{1A99}\\x{1B50}-\\x{1B59}"
+                    "\\x{1BB0}-\\x{1BB9}\\x{1C40}-\\x{1C49}\\x{1C50}-\\x{1C59}\\x{A620}-\\x{A629}"
+                    "\\x{A8D0}-\\x{A8D9}\\x{A900}-\\x{A909}\\x{A9D0}-\\x{A9D9}\\x{AA50}-\\x{AA59}"
+                    "\\x{ABF0}-\\x{ABF9}\\x{FF10}-\\x{FF19}"));
+
+    u_uastrncpy(replacement,
+                "0-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-9",
+                strlen("0-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-9"));
+
+    nustr = u_strtransliterate(uregexp, replacement, nustr, u_strlen(nustr));
+
+    return nustr;
+}
+
+UChar *leading_trailing_spaces(UChar * nustr)
+{
+    UChar uregexp[200], replacement[200];
+
+    if (nustr == NULL)
+    {
+        return NULL;
+    }
+
+    u_uastrncpy(uregexp, "\\s+",	strlen( "\\s+"));
+    u_uastrncpy(replacement, " ", strlen(" "));
+
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
+    
+    u_uastrncpy(uregexp, "^\\s+",	strlen( "^\\s+"));
+    u_uastrncpy(replacement, "", strlen(""));
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 0);
+
+    u_uastrncpy(uregexp, "\\s+$",	strlen( "\\s+$"));
+
+    nustr = u_strreplace(uregexp, replacement, nustr, u_strlen(nustr), 1);
+
+    return nustr;
+
+}
+
+text *normalize(text *str, text *sf, int is_search)
+{
+    UChar *ustr, *nustr, *temp, *usf;
+    int32_t nustr_len, temp_len, str_len;
+    UNormalizer2 *normalizer;
+    char *regexp, *result;
+    UChar uregexp[200], replacement[200];
+    UErrorCode err;
+
+    if (str == NULL || sf == NULL)
+    {
+        return NULL;
+    }
+
+    normalizer = (UNormalizer2 *)unorm2_getNFKDInstance(&err);
+    if (U_FAILURE(err))
+    {
+        return NULL;
+    }
+
+    ustr = palloc(VARSIZE(str) * sizeof(UChar));
+    nustr = palloc(VARSIZE(str)* sizeof(UChar));
+    temp = palloc(VARSIZE(str) * sizeof(UChar));
+    usf = palloc(VARSIZE(sf) * sizeof(UChar));
+
+    temp = u_strFromUTF8(temp, VARSIZE(str), NULL, VARDATA(str), VARSIZE(str), &err);
+    if (U_FAILURE(err) || temp == NULL)
+    {
+        str = NULL;
+        goto Fail;
+    }
+
+    nustr = u_strncpy(nustr, temp, VARSIZE(str));
+    if (nustr == NULL)
+    {
+        str = NULL;
+        goto Fail;
+    }
+
+    usf = u_strFromUTF8(usf, VARSIZE(sf), NULL, VARDATA(sf), VARSIZE(sf), &err);
+    if (usf == NULL)
+    {
+        str = NULL;
+        goto Fail;
+    }
+    //Apply NACO normalization to input string; based on
+    //http://www.loc.gov/catdir/pcc/naco/SCA_PccNormalization_Final_revised.pdf
+    //
+    //Note that unlike a strict reading of the NACO normalization rules,
+    //output is returned as lowercase instead of uppercase for compatibility
+    //with previous versions of the Evergreen naco_normalize routine.
+    //
+    //Convert to upper-case first; even though final output will be lowercase, doing this will
+    //ensure that the German eszett (?) and certain ligatures (?, ?, ?, etc.) will be handled correctly.
+    //If there are any bugs in Perl's implementation of upcasing, they will be passed through here.
+
+    u_strToUpper(ustr, 
+                VARSIZE(str),
+                temp,
+                VARSIZE(str),
+                NULL,
+                &err);
+    pfree(temp);
+    temp = NULL;
+    if (U_FAILURE(err))
+    {
+        str = NULL;
+        goto Fail;
+    }
+
+    regexp = "\\x{0098}.*?\\x{009C}";
+    u_uastrncpy(uregexp, regexp, strlen(regexp));
+    u_uastrncpy(replacement, "", strlen(""));
+
+    ustr = u_strreplace(uregexp, replacement, ustr, u_strlen(ustr), 1);
+
+    unorm2_normalize(normalizer, ustr, VARSIZE(str), nustr, VARSIZE(str), &err);
+    if (U_FAILURE(err))
+    {
+        str = NULL;
+        goto Fail;
+    }
+
+    //additional substitutions - 3.6.
+    nustr = additional_substitutions(nustr, is_search);
+
+    //transformations based on Unicode category codes
+    nustr = transformations_on_unicode(nustr, usf);
+
+    //since we've stripped out the control characters, we can now
+    //use a few as placeholders temporarily
+    nustr = replace_placehoders(nustr);
+
+    //decimal digit
+    nustr = decimal_digits(nustr);
+
+    //intentionally skipping step 8 of the NACO algorithm; if the string
+    //gets normalized away, that's fine.
+
+    //leading and trailing spaces
+    nustr = leading_trailing_spaces(nustr);
+    nustr_len = u_strlen(nustr);
+
+    temp = palloc(nustr_len * sizeof(UChar));
+    u_strToLower(temp, nustr_len, nustr, nustr_len, NULL, &err);
+    temp_len = nustr_len;
+
+
+    u_strToUTF8(NULL, 0, &str_len, temp, temp_len, &err);
+
+    SET_VARSIZE(str, str_len + VARHDRSZ);
+    str = (text *)palloc(str_len + VARHDRSZ);
+    
+    result = u_strToUTF8(VARDATA(str), str_len, &str_len, temp, temp_len, &err); 
+
+    result[str_len] = '\0';
+
+    
+Fail:
+    pfree(temp);
+    pfree(ustr);
+    pfree(nustr);
+
+    return str;
+}
+
+PG_FUNCTION_INFO_V1(naco_normalize);
+
+Datum naco_normalize(PG_FUNCTION_ARGS)
+{
+    text *str = PG_GETARG_TEXT_P(0);
+    text *sf = PG_GETARG_TEXT_P(1);
+    
+
+    if (str == NULL || sf == NULL)
+    {
+        PG_RETURN_TEXT_P(NULL);
+    }
+
+    str = normalize(str, sf, 0);
+    
+    PG_RETURN_TEXT_P(str);
+}
+
+PG_FUNCTION_INFO_V1(search_normalize);
+
+Datum search_normalize(PG_FUNCTION_ARGS)
+{
+    text *str = PG_GETARG_TEXT_P(0);
+    text *sf = PG_GETARG_TEXT_P(1);
+
+
+    if (str == NULL || sf == NULL)
+    {
+        PG_RETURN_TEXT_P(NULL);
+    }
+
+    str = normalize(str, sf, 1);
+
+    PG_RETURN_TEXT_P(str);
+}
+
-- 
2.11.0