Iterating through CRKN_OUP.mrc for more corruption to fix

author Dan Scott <dan@coffeecode.net>

Fri, 29 Jul 2011 17:21:30 +0000 (13:21 -0400)

committer Dan Scott <dscott@laurentian.ca>

Tue, 7 May 2013 18:36:45 +0000 (14:36 -0400)
author Dan Scott <dan@coffeecode.net>
Fri, 29 Jul 2011 17:21:30 +0000 (13:21 -0400)
committer Dan Scott <dscott@laurentian.ca>
Tue, 7 May 2013 18:36:45 +0000 (14:36 -0400)
diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py

index c49e0da..f81554c 100644 (file)
--- a/tools/ebooks/prep_ebook_records.py
+++ b/tools/ebooks/prep_ebook_records.py
@@ -361,15 +361,31 @@ def clean_diacritics(field):
              continue
  
          # Let the substitutions commence - maybe move to a map table?
+
+        # COMBINING MACRON
          tmpsf = subfield[1].replace(u'\xd5a', u'a\u0304')
+        tmpsf = tmpsf.replace(u'\xd5e', u'e\u0304')
          tmpsf = tmpsf.replace(u'\xd5i', u'i\u0304')
+        tmpsf = tmpsf.replace(u'\xd5o', u'o\u0304')
+        tmpsf = tmpsf.replace(u'\xd5u', u'u\u0304')
+
+        # COMBINING MODIFIER LETTER HALF RING
          tmpsf = tmpsf.replace(u'i\xb1', u'i\u02be')
  
+        # COMBINING TILDE
+        tmpsf = tmpsf.replace(u'\xf5n', u'n\u0303')
+
+        # COMBINING CEDILLA
+        tmpsf = tmpsf.replace(u'\xb0c', u'c\u0327')
+
+        # COMBINING ACUTE ACCENT
+        tmpsf = tmpsf.replace(u'\xd4s', u's\u0301')
+
          new_field.add_subfield(subfield[0], tmpsf)
          if r'\x' in repr(tmpsf):
              global RECORD_COUNT
              print " * %d Hex value found in %s:%s - [%s] [%s]" % (
-                RECORD_COUNT, field.tag, subfield[0], tmpsf, repr(tmpsf)
+                RECORD_COUNT, field.tag, subfield[0], tmpsf.encode('utf8'), repr(tmpsf)
              )
  
      return new_field
author	Dan Scott <dan@coffeecode.net>
	Fri, 29 Jul 2011 17:21:30 +0000 (13:21 -0400)
committer	Dan Scott <dscott@laurentian.ca>
	Tue, 7 May 2013 18:36:45 +0000 (14:36 -0400)