marked_isbn = mark_isbn_for_sfx(record, options)
for field in record.get_fields():
+ field = clean_diacritics(field)
# Process all of the 856 fields
if field.tag == '856':
new_fields = process_urls(field, options, publisher)
return new_record
+def clean_diacritics(field):
+ """
+ Change specific patterns of bytes into other patterns of bytes
+
+ We get some horribly corrupted records. This is an attempt to reverse the
+ horror via equally horrible byte-matching for known messed up conditions.
+ """
+
+ if field.is_control_field():
+ return field
+
+ new_field = pymarc.Field(
+ tag=field.tag,
+ indicators=[field.indicator1, field.indicator2]
+ )
+
+ for subfield in field:
+ if r'\x' not in repr(subfield[1]):
+ new_field.add_subfield(subfield[0], subfield[1])
+ continue
+
+ # Let the substitutions commence - maybe move to a map table?
+ tmpsf = subfield[1].replace(u'\xd5a', u'a\u0304')
+ tmpsf = tmpsf.replace(u'\xd5i', u'i\u0304')
+ tmpsf = tmpsf.replace(u'i\xb1', u'i\u02be')
+
+ new_field.add_subfield(subfield[0], tmpsf)
+ if r'\x' in repr(tmpsf):
+ global RECORD_COUNT
+ print " * %d Hex value found in %s:%s - [%s] [%s]" % (
+ RECORD_COUNT, field.tag, subfield[0], tmpsf, repr(tmpsf)
+ )
+
+ return new_field
+
+
def add_publisher(record, options):
"""
This is a convoluted way to avoid creating a new 710 if we already