From 64df62ee28320f7df4c013bbf9ca65a99e68a748 Mon Sep 17 00:00:00 2001
From: Dan Scott <dan@coffeecode.net>
Date: Fri, 29 Jul 2011 13:21:30 -0400
Subject: [PATCH] Iterating through CRKN_OUP.mrc for more corruption to fix

Tildes, accute accents, cedillas, we got em all.

Signed-off-by: Dan Scott <dscott@laurentian.ca>
---
 tools/ebooks/prep_ebook_records.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py
index c49e0dab9b..f81554c2ff 100644
--- a/tools/ebooks/prep_ebook_records.py
+++ b/tools/ebooks/prep_ebook_records.py
@@ -361,15 +361,31 @@ def clean_diacritics(field):
             continue
 
         # Let the substitutions commence - maybe move to a map table?
+
+        # COMBINING MACRON
         tmpsf = subfield[1].replace(u'\xd5a', u'a\u0304')
+        tmpsf = tmpsf.replace(u'\xd5e', u'e\u0304')
         tmpsf = tmpsf.replace(u'\xd5i', u'i\u0304')
+        tmpsf = tmpsf.replace(u'\xd5o', u'o\u0304')
+        tmpsf = tmpsf.replace(u'\xd5u', u'u\u0304')
+
+        # COMBINING MODIFIER LETTER HALF RING
         tmpsf = tmpsf.replace(u'i\xb1', u'i\u02be')
 
+        # COMBINING TILDE
+        tmpsf = tmpsf.replace(u'\xf5n', u'n\u0303')
+
+        # COMBINING CEDILLA
+        tmpsf = tmpsf.replace(u'\xb0c', u'c\u0327')
+
+        # COMBINING ACUTE ACCENT
+        tmpsf = tmpsf.replace(u'\xd4s', u's\u0301')
+
         new_field.add_subfield(subfield[0], tmpsf)
         if r'\x' in repr(tmpsf):
             global RECORD_COUNT
             print " * %d Hex value found in %s:%s - [%s] [%s]" % (
-                RECORD_COUNT, field.tag, subfield[0], tmpsf, repr(tmpsf)
+                RECORD_COUNT, field.tag, subfield[0], tmpsf.encode('utf8'), repr(tmpsf)
             )
 
     return new_field
-- 
2.11.0