Convert uniformly to Unicode output

author dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>

Thu, 16 Dec 2010 16:42:24 +0000 (16:42 +0000)

committer dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>

Thu, 16 Dec 2010 16:42:24 +0000 (16:42 +0000)
author dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Thu, 16 Dec 2010 16:42:24 +0000 (16:42 +0000)
committer dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Thu, 16 Dec 2010 16:42:24 +0000 (16:42 +0000)
diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py

index 4b20b20..f1177f2 100644 (file)
--- a/tools/ebooks/prep_ebook_records.py
+++ b/tools/ebooks/prep_ebook_records.py
@@ -220,29 +220,34 @@ def process_records(options):
      """Converts raw ebook MARC records to Conifer-ready MARC records"""
  
      sample = ''
-    reader = pymarc.MARCReader(open(options['input'], 'rb'))
-    writer = pymarc.MARCWriter(open(options['output'], 'wb'))
+    reader = pymarc.MARCReader(
+        open(options['input'], mode='rb'), to_unicode=True
+    )
+    writer = pymarc.MARCWriter(open(options['output'], mode='wb'))
      if ('sample' in options):
-        sample = pymarc.MARCWriter(open(options['sample'], 'wb'))
+        sample = pymarc.MARCWriter(open(options['sample'], mode='wb'))
  
      cnt = 0
      for record in reader:
          cnt = cnt + 1
-        if not (record['856'] and record['856']['u']):
-            print("* No 856 for record # %s in file %s"
-                    % (cnt, options['input'])
-            )
+        try:
+            if not (record['856'] and record['856']['u']):
+                print("* No 856 for record # %s in file %s"
+                        % (cnt, options['input'])
+                )
  
-        new_record = process_fields(record, options)
+            new_record = process_fields(record, options)
  
-        writer.write(new_record)
-        if (sample and ((cnt == 1) or (cnt % 100 == 0))):
-            sample.write(new_record)
+            writer.write(new_record)
+            if (sample and ((cnt == 1) or (cnt % 100 == 0))):
+                sample.write(new_record)
+        except Exception, ex:
+            print("* Error processing record %s - %s" % (cnt, ex))
  
  def process_fields(record, options):
      """Decide which fields to add, delete, and keep"""
  
-    new_record = pymarc.Record()
+    new_record = pymarc.Record(to_unicode=True, force_utf8=True)
  
      for field in record.get_fields():
          # Process all of the 856 fields
@@ -435,7 +440,7 @@ def process_loc_data(raw_content):
          content = content[0:lcsh]
  
      # Farewell, starting and ending whitespace
-    content = content.strip()
+    content = content.strip().decode('iso8859-1')
  
      return content
author	dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
	Thu, 16 Dec 2010 16:42:24 +0000 (16:42 +0000)
committer	dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
	Thu, 16 Dec 2010 16:42:24 +0000 (16:42 +0000)