More search and replace operations
authorDan Scott <dan@coffeecode.net>
Fri, 29 Jul 2011 19:49:10 +0000 (15:49 -0400)
committerDan Scott <dscott@laurentian.ca>
Tue, 7 May 2013 18:37:04 +0000 (14:37 -0400)
S cedilla, combining ligatures, modifier letter prime

Signed-off-by: Dan Scott <dscott@laurentian.ca>
tools/ebooks/prep_ebook_records.py

index e6fed11..5c0af4d 100644 (file)
@@ -377,6 +377,7 @@ def clean_diacritics(field):
 
         # COMBINING CEDILLA
         tmpsf = tmpsf.replace(u'\xb0c', u'c\u0327')
+        tmpsf = tmpsf.replace(u'\u01afS', u'S\u0327')
 
         # S WITH COMBINING ACUTE ACCENT
         tmpsf = tmpsf.replace(u'\xd4S', u'\u015a')
@@ -397,6 +398,17 @@ def clean_diacritics(field):
         # COMBINING DOT ABOVE
         tmpsf = tmpsf.replace(u'\xfeI', u'I\u0307')
 
+        # COMBINING LIGATURE LEFT HALF
+        tmpsf = tmpsf.replace(u'\xd9i', u'i\ufe20')
+        tmpsf = tmpsf.replace(u'\xd9t', u't\ufe20')
+
+        # COMBINING LIGATURE RIGHT HALF
+        tmpsf = tmpsf.replace(u'\xfda', u'a\ufe21')
+        tmpsf = tmpsf.replace(u'\xfds', u's\ufe21')
+
+        # MODIFIER LETTER PRIME
+        tmpsf = tmpsf.replace(u'\xf0', u'\u02b9')
+
         new_field.add_subfield(subfield[0], tmpsf)
         if r'\x' in repr(tmpsf):
             global RECORD_COUNT