-- in-db indexing normalizers
INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES (
+ 'Unicode Transliteration',
+ 'Transliterate non-ASCII Unicode code points to ASCII text',
+ 'everngreen.unidecode',
+ 0
+);
+
+INSERT INTO config.index_normalizer (name, description, func, param_count) VALUES (
'NACO Normalize',
'Apply NACO normalization rules to the extracted text. See http://www.loc.gov/catdir/pcc/naco/normrule-2.html for details.',
'naco_normalize',
i.id
FROM config.metabib_field m,
config.index_normalizer i
- WHERE i.func IN ('naco_normalize','split_date_range')
+ WHERE i.func IN ('evergreen.unidecode','naco_normalize','split_date_range')
+ AND m.search_field
AND m.id NOT IN (18, 19);
INSERT INTO config.metabib_field_index_norm_map (field,norm,pos)
--- /dev/null
+/*
+ * First, install the required Perl module as root:
+ *
+ * # cpan Text::Unidecode
+ *
+ * and after that is successfully installed, apply the following SQL script.
+ *
+ */
+
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+-- A simple function using Text::Unidecode
+CREATE OR REPLACE FUNCTION evergreen.unidecode (TEXT) RETURNS TEXT AS $func$
+ use Text::Unidecode;
+ return unidecode(shift());
+$func$ LANGUAGE PLPERLU IMMUTABLE;
+
+-- Register that function
+INSERT INTO config.index_normalizer (
+ name,
+ description,
+ func,
+ param_count
+) VALUES (
+ 'Unicode Transliteration',
+ 'Transliterate non-ASCII Unicode code points to ASCII text',
+ 'everngreen.unidecode',
+ 0
+);
+
+-- Apply unidecode to all search fields
+INSERT INTO config.metabib_field_index_norm_map (field, norm)
+ SELECT f.id, n.id
+ FROM config.metabib_field f,
+ config.index_normalizer n
+ WHERE n.func = 'evergreen.unidecode'
+ AND f.search_field
+ AND f.id NOT IN (18,19);
+
+-- Re-index all search fields to apply the new normalization. This could take a while...
+UPDATE metabib.title_field_entry
+ SET value=value
+ WHERE field IN (SELECT id FROM config.metabib_field WHERE search_field);
+
+UPDATE metabib.author_field_entry
+ SET value=value
+ WHERE field IN (SELECT id FROM config.metabib_field WHERE search_field);
+
+UPDATE metabib.subject_field_entry
+ SET value=value
+ WHERE field IN (SELECT id FROM config.metabib_field WHERE search_field);
+
+UPDATE metabib.keyword_field_entry
+ SET value=value
+ WHERE field IN (SELECT id FROM config.metabib_field WHERE search_field);
+
+UPDATE metabib.series_field_entry
+ SET value=value
+ WHERE field IN (SELECT id FROM config.metabib_field WHERE search_field);
+
+UPDATE metabib.identifier_field_entry
+ SET value=value
+ WHERE field IN (SELECT id FROM config.metabib_field WHERE search_field);
+
+-- Clean up a bit after ourselves
+VACUUM FULL ANALYZE VERBOSE metabib.title_field_entry;
+VACUUM FULL ANALYZE VERBOSE metabib.author_field_entry;
+VACUUM FULL ANALYZE VERBOSE metabib.subject_field_entry;
+VACUUM FULL ANALYZE VERBOSE metabib.keyword_field_entry;
+VACUUM FULL ANALYZE VERBOSE metabib.series_field_entry;
+VACUUM FULL ANALYZE VERBOSE metabib.identifier_field_entry;
+
+COMMIT;
+