From 0b2784f4f631ee9869f81b06e7ceeb28703a2bd4 Mon Sep 17 00:00:00 2001 From: Dan Scott Date: Tue, 2 Aug 2011 15:44:36 -0400 Subject: [PATCH] Correct I BREVE for Mr. Sakharov We were adding an extra i to the composed I BREVE chars. Also log diffs so we can eyeball the changes and know that they are good. Signed-off-by: Dan Scott --- tools/ebooks/prep_ebook_records.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py index 43d756e3b9..ef39467190 100644 --- a/tools/ebooks/prep_ebook_records.py +++ b/tools/ebooks/prep_ebook_records.py @@ -401,8 +401,8 @@ def clean_diacritics(field): tmpsf = tmpsf.replace(u'\xe6g', u'\u011f') # I BREVE - tmpsf = tmpsf.replace(u'\xe6I', u'i\u012c') - tmpsf = tmpsf.replace(u'\xe6i', u'i\u012d') + tmpsf = tmpsf.replace(u'\xe6I', u'\u012c') + tmpsf = tmpsf.replace(u'\xe6i', u'\u012d') # COMBINING DOT ABOVE tmpsf = tmpsf.replace(u'\xfeI', u'I\u0307') @@ -419,12 +419,17 @@ def clean_diacritics(field): tmpsf = tmpsf.replace(u'\xf0', u'\u02b9') new_field.add_subfield(subfield[0], tmpsf) + global RECORD_COUNT if r'\x' in repr(tmpsf): - global RECORD_COUNT print " * %d Hex value found in %s:%s - [%s] [%s]" % ( RECORD_COUNT, field.tag, subfield[0], tmpsf.encode('utf8'), repr(tmpsf) ) + if (repr(subfield[1]) != repr(tmpsf)): + print "* %d\tOld: [%s]\tNew: [%s]" % ( + RECORD_COUNT, subfield[1].encode('utf8'), tmpsf.encode('utf8') + ) + return new_field -- 2.11.0