Add more carons, based on outliers from CRKN_OUP
authorDan Scott <dan@coffeecode.net>
Tue, 2 Aug 2011 20:38:03 +0000 (16:38 -0400)
committerDan Scott <dscott@laurentian.ca>
Tue, 7 May 2013 18:37:10 +0000 (14:37 -0400)
Note that in at least one case, this would result in MUSICA becoming
MSICA. Sigh. Finally the conflicts arrive.

Signed-off-by: Dan Scott <dscott@laurentian.ca>
tools/ebooks/prep_ebook_records.py

index ef39467..2fb4859 100644 (file)
@@ -392,10 +392,22 @@ def clean_diacritics(field):
         tmpsf = tmpsf.replace(u'\xd4S', u'\u015a')
         tmpsf = tmpsf.replace(u'\xd4s', u'\u015b')
 
+        # A CARON
+        tmpsf = tmpsf.replace(u'\xdaA', u'\u0100')
+        tmpsf = tmpsf.replace(u'\xdaa', u'\u0101')
+
+        # C CARON
+        tmpsf = tmpsf.replace(u'\xdaC', u'\u010c')
+        tmpsf = tmpsf.replace(u'\xdac', u'\u010d')
+
         # R CARON
         tmpsf = tmpsf.replace(u'\xdaR', u'\u0158')
         tmpsf = tmpsf.replace(u'\xdar', u'\u0159')
 
+        # S CARON
+        tmpsf = tmpsf.replace(u'\xdaS', u'\u0160')
+        tmpsf = tmpsf.replace(u'\xdas', u'\u0161')
+
         # G BREVE
         tmpsf = tmpsf.replace(u'\xe6G', u'\u011e')
         tmpsf = tmpsf.replace(u'\xe6g', u'\u011f')