Final (?) tweaks for ebook processing
authorDan Scott <dan@coffeecode.net>
Fri, 15 Jul 2011 21:20:27 +0000 (17:20 -0400)
committerDan Scott <dscott@laurentian.ca>
Mon, 12 Nov 2012 17:57:51 +0000 (12:57 -0500)
  * 506 punctuation
  * Enable Algoma to not have 856s in Cambridge records
  * Add the --platform option (although we might not use it)

Dan Scott <dscott@laurentian.ca>

Signed-off-by: Dan Scott <dscott@laurentian.ca>
tools/ebooks/prep_ebook_records.py

index 2bf9d1c..67f924c 100644 (file)
@@ -93,7 +93,11 @@ Required arguments:
     -c / --consortium : The name of the consortial license to be inserted in
                         each 506$b access restriction note.
 
-    -p / --publisher : The name of the publisher to be inserted in a 710 field.
+    -p / --publisher : The name of the publisher to be inserted in a 710 field
+                       with a subfield 4 relator code 'pbl'.
+
+    -P / --platform: The name of the digital platform to be inserted in a 710
+                     field.
 
     -A / --algoma: Add an 856 for Algoma University
 
@@ -130,6 +134,8 @@ def consolidate_options(opts):
             _options['--consortium'] = val
         elif key == '-p':
             _options['--publisher'] = val
+        elif key == '-P':
+            _options['--platform'] = val
         elif key == '-n':
             _options['--note'] = val
         elif key == '-A':
@@ -207,6 +213,9 @@ def check_options(options):
     if '--note' in options:
         clean_opts['note'] = options['--note']
 
+    if '--platform' in options:
+        clean_opts['platform'] = options['--platform']
+
     clean_opts['libraries'] = _libraries
     clean_opts['input'] = _input
     clean_opts['output'] = _output
@@ -233,10 +242,10 @@ def check_libraries(options):
 def parse_opts():
     """Get command-line arguments from the script"""
     try:
-        _short_opts = 'i:o:a:c:p:ALWn:s:h'
+        _short_opts = 'i:o:a:c:p:ALWn:P:s:h'
         _long_opts = ['input=', 'output=', 'authorization=', 'consortium=', 
             'publisher=', 'algoma', 'laurentian', 'windsor', 'note=',
-            'sample=', 'help'
+            'platform=', 'sample=', 'help'
         ]
         opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts) 
     except getopt.GetoptError, ex:
@@ -279,10 +288,29 @@ def process_fields(record, options):
 
     new_record = pymarc.Record(to_unicode=True, force_utf8=True)
 
+    add_cat_source(new_record, options) # 040
+    add_restriction(record, options) # 506
+
+    # 590
+    if 'note' in options:
+        note = pymarc.Field(tag = '590',
+            indicators = [' ', ' '],
+            subfields = [
+                'a', options['note']
+            ]
+        )
+        record.add_field(note)
+
+    add_marc_source(record, options) # 598
+    publisher = add_publisher(record, options) # 710
+    add_platform(record, options) # 710
+
+    marked_isbn = mark_isbn_for_sfx(record, options)
+
     for field in record.get_fields():
         # Process all of the 856 fields
         if field.tag == '856':
-            new_fields = process_urls(field, options)
+            new_fields = process_urls(field, options, publisher)
             if new_fields:
                 for new_856 in new_fields:
                     new_record.add_field(new_856)
@@ -295,42 +323,40 @@ def process_fields(record, options):
         else:
             new_record.add_field(field)
 
-    add_publisher(record, new_record, options)
-    add_restriction(new_record, options)
-    marked_isbn = mark_isbn_for_sfx(new_record, options)
     if not marked_isbn:
         print("No matching ISBN target found in SFX for %s" %
             (new_record['856']['u'])
         )
 
-    if 'note' in options:
-        note = pymarc.Field(tag = '590',
-            indicators = [' ', ' '],
-            subfields = [
-                'a', options['note']
-            ]
-        )
-        new_record.add_field(note)
-
-    add_cat_source(new_record, options)
-    add_marc_source(new_record, options)
-
     return new_record
 
-def add_publisher(record, new_record, options):
+def add_publisher(record, options):
     """
     This is a convoluted way to avoid creating a new 710 if we already
     have a matching 710 and just need to add the publisher relator code.
     """
 
+    publisher = options['publisher']
     munge_publisher = False
     need_publisher = True
     need_relator = True
 
+    raw_publisher = None
+    try:
+        raw_publisher = record['260']['b']
+    except:
+        pass
+
+    if raw_publisher:
+        if 'Oxford' in raw_publisher or 'Clarendon' in raw_publisher:
+            publisher = 'Oxford University Press'
+        elif 'Cambridge' in raw_publisher:
+            publisher = 'Cambridge University Press'
+
     # Iterate through all of the existing 710 fields
     for sten in record.get_fields('710'):
         for pub in sten.get_subfields('a'):
-            if pub == options['publisher']:
+            if pub == publisher:
                 munge_publisher = True
                 for rel in sten.get_subfields('4'): 
                     if rel == 'pbl':
@@ -346,11 +372,38 @@ def add_publisher(record, new_record, options):
         seven_ten = pymarc.Field(tag = '710',
             indicators = ['2', ' '],
             subfields = [
-                'a', options['publisher'],
+                'a', publisher,
                 '4', 'pbl'
             ]
         )
-        new_record.add_field(seven_ten)
+        record.add_field(seven_ten)
+
+    return publisher
+
+def add_platform(record, options):
+    """
+    This is a convoluted way to avoid creating a new 710 if we already
+    have a matching 710 for digital platform.
+    """
+
+    platform = options['platform']
+    need_platform = True
+
+    # Iterate through all of the existing 710 fields
+    for sten in record.get_fields('710'):
+        for pub in sten.get_subfields('a'):
+            if pub == platform:
+                need_platform = False
+
+    if need_platform:
+        # Add the platform
+        seven_ten = pymarc.Field(tag = '710',
+            indicators = ['2', ' '],
+            subfields = [
+                'a', platform
+            ]
+        )
+        record.add_field(seven_ten)
 
 def mark_isbn_for_sfx(record, options):
     """
@@ -440,8 +493,8 @@ def add_restriction(new_record, options):
             indicators = ['1', ' '],
             subfields = [
                 'a', libopts['access_note'],
-                'b', options['consortium'],
-                'e', options['authorization'],
+                'b', options['consortium'] + ' ; ',
+                'e', options['authorization'] + '.',
                 '9', libopts['code']
             ]
         )
@@ -482,7 +535,7 @@ def add_marc_source(record, options):
     )
     record.add_field(marc_source)
 
-def process_urls(field, options):
+def process_urls(field, options, publisher):
     """Creates 856 fields required by Conifer"""
 
     new_fields = []
@@ -498,6 +551,10 @@ def process_urls(field, options):
             new_fields.append(enrich)
     else:
         for lib in options['libraries']:
+
+            # Tweak for Algoma for combined CUP/OUP
+            if lib == 'algoma' and 'Cambridge' in publisher:
+                continue
             data = options['settings'].get_settings(lib)
             subs = get_subfields(field, data)
             eight_five_six = pymarc.Field(tag = '856',