Revert inadvertent damage caused by last commit
authorDan Scott <dan@coffeecode.net>
Thu, 28 Jul 2011 15:53:27 +0000 (11:53 -0400)
committerDan Scott <dscott@laurentian.ca>
Tue, 7 May 2013 18:36:35 +0000 (14:36 -0400)
Two different copies of the Conifer repo = damage!

Signed-off-by: Dan Scott <dscott@laurentian.ca>
tools/ebooks/prep_ebook_records.py

index e753c9c..cefe047 100644 (file)
@@ -14,8 +14,11 @@ be accommodated in batch load.
 """
 
 import os, os.path, sys, getopt, pymarc, pymarc.marc8, re, urllib2
+from datetime import date
 from BeautifulSoup import BeautifulSoup
 
+RECORD_COUNT = 0
+
 class Institution():
     """Defines standard settings for each Conifer institution"""
 
@@ -27,7 +30,7 @@ class Institution():
             "proxy": "http://libproxy.auc.ca/login?url=", \
             "link_text": "Available online", \
             "sfx_url": "http://sfx.scholarsportal.info/algoma", \
-            "access_note": "Access restricted to users with a valid Algoma University ID" \
+            "access_note": "Access restricted to users with a valid Algoma University ID ;" \
         }
         
         self.laurentian = { \
@@ -36,7 +39,7 @@ class Institution():
             "proxy": "https://librweb.laurentian.ca/login?url=", \
             "link_text": "Available online / disponible en ligne", \
             "sfx_url": "http://sfx.scholarsportal.info/laurentian", \
-            "access_note": "Access restricted to users with a valid Laurentian University ID" \
+            "access_note": "Access restricted to users with a valid Laurentian University ID ;" \
         }
 
         self.windsor = { \
@@ -45,7 +48,7 @@ class Institution():
             "proxy": "http://ezproxy.uwindsor.ca/login?url=", \
             "link_text": "Available online", \
             "sfx_url": "http://sfx.scholarsportal.info/windsor", \
-            "access_note": "Access restricted to users with a valid University of Windsor ID" \
+            "access_note": "Access restricted to users with a valid University of Windsor ID ;" \
         }
 
     def get_settings(self, lib):
@@ -92,8 +95,12 @@ Required arguments:
     -c / --consortium : The name of the consortial license to be inserted in
                         each 506$b access restriction note.
 
-    -p / --publisher : The name of the publisher to be inserted in a 710 field.
+    -p / --publisher : The name of the publisher to be inserted in a 710 field
+                       with a subfield 4 relator code 'pbl'.
 
+    -P / --platform: The name of the digital platform to be inserted in a 710
+                     field.
     -A / --algoma: Add an 856 for Algoma University
 
     -L / --laurentian: Add an 856 for Laurentian University
@@ -129,6 +136,8 @@ def consolidate_options(opts):
             _options['--consortium'] = val
         elif key == '-p':
             _options['--publisher'] = val
+        elif key == '-P':
+            _options['--platform'] = val
         elif key == '-n':
             _options['--note'] = val
         elif key == '-A':
@@ -206,6 +215,9 @@ def check_options(options):
     if '--note' in options:
         clean_opts['note'] = options['--note']
 
+    if '--platform' in options:
+        clean_opts['platform'] = options['--platform']
+
     clean_opts['libraries'] = _libraries
     clean_opts['input'] = _input
     clean_opts['output'] = _output
@@ -232,10 +244,10 @@ def check_libraries(options):
 def parse_opts():
     """Get command-line arguments from the script"""
     try:
-        _short_opts = 'i:o:a:c:p:ALWn:s:h'
+        _short_opts = 'i:o:a:c:p:ALWn:P:s:h'
         _long_opts = ['input=', 'output=', 'authorization=', 'consortium=', 
             'publisher=', 'algoma', 'laurentian', 'windsor', 'note=',
-            'sample=', 'help'
+            'platform=', 'sample=', 'help'
         ]
         opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts) 
     except getopt.GetoptError, ex:
@@ -248,6 +260,7 @@ def parse_opts():
 def process_records(options):
     """Converts raw ebook MARC records to Conifer-ready MARC records"""
 
+    global RECORD_COUNT
     sample = ''
     reader = pymarc.MARCReader(
         open(options['input'], mode='rb'), to_unicode=True
@@ -256,32 +269,50 @@ def process_records(options):
     if ('sample' in options):
         sample = pymarc.MARCWriter(open(options['sample'], mode='wb'))
 
-    cnt = 0
     for record in reader:
-        cnt = cnt + 1
+        RECORD_COUNT += 1
         try:
             if not (record['856'] and record['856']['u']):
                 print("* No 856 for record # %s in file %s"
-                        % (cnt, options['input'])
+                        % (RECORD_COUNT, options['input'])
                 )
 
             new_record = process_fields(record, options)
 
             writer.write(new_record)
-            if (sample and ((cnt == 1) or (cnt % 100 == 0))):
+            if (sample and ((RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0))):
                 sample.write(new_record)
         except Exception, ex:
-            print("* Error processing record %s - %s" % (cnt, ex))
+            print("* Error processing record %d - %s" % (RECORD_COUNT, ex))
 
 def process_fields(record, options):
     """Decide which fields to add, delete, and keep"""
 
     new_record = pymarc.Record(to_unicode=True, force_utf8=True)
 
+    add_cat_source(new_record, options) # 040
+    add_restriction(record, options) # 506
+
+    # 590
+    if 'note' in options:
+        note = pymarc.Field(tag = '590',
+            indicators = [' ', ' '],
+            subfields = [
+                'a', options['note']
+            ]
+        )
+        record.add_field(note)
+
+    add_marc_source(record, options) # 598
+    publisher = add_publisher(record, options) # 710
+    add_platform(record, options) # 710
+
+    marked_isbn = mark_isbn_for_sfx(record, options)
+
     for field in record.get_fields():
         # Process all of the 856 fields
         if field.tag == '856':
-            new_fields = process_urls(field, options)
+            new_fields = process_urls(field, options, publisher)
             if new_fields:
                 for new_856 in new_fields:
                     new_record.add_field(new_856)
@@ -294,41 +325,46 @@ def process_fields(record, options):
         else:
             new_record.add_field(field)
 
-    add_publisher(record, new_record, options)
-    add_restriction(new_record, options)
-    marked_isbn = mark_isbn_for_sfx(new_record, options)
     if not marked_isbn:
-        print("No matching ISBN target found in SFX for %s" %
-            (new_record['856']['u'])
-        )
-
-    if 'note' in options:
-        note = pymarc.Field(tag = '590',
-            indicators = [' ', ' '],
-            subfields = [
-                'a', options['note']
-            ]
-        )
-        new_record.add_field(note)
-
-    add_cat_source(new_record, options)
+        try:
+            isbn = record['020']['a']
+            print("ISBN: [%s] - no matching ISBN target found in SFX for %s" %
+                (isbn, new_record['856']['u'])
+            )
+        except:
+            print("No matching ISBN target found in SFX for %s" %
+                (new_record['856']['u'])
+            )
 
     return new_record
 
-def add_publisher(record, new_record, options):
+def add_publisher(record, options):
     """
     This is a convoluted way to avoid creating a new 710 if we already
     have a matching 710 and just need to add the publisher relator code.
     """
 
+    publisher = options['publisher']
     munge_publisher = False
     need_publisher = True
     need_relator = True
 
+    raw_publisher = None
+    try:
+        raw_publisher = record['260']['b']
+    except:
+        pass
+
+    if raw_publisher:
+        if 'Oxford' in raw_publisher or 'Clarendon' in raw_publisher:
+            publisher = 'Oxford University Press'
+        elif 'Cambridge' in raw_publisher:
+            publisher = 'Cambridge University Press'
+
     # Iterate through all of the existing 710 fields
     for sten in record.get_fields('710'):
         for pub in sten.get_subfields('a'):
-            if pub == options['publisher']:
+            if pub == publisher:
                 munge_publisher = True
                 for rel in sten.get_subfields('4'): 
                     if rel == 'pbl':
@@ -344,12 +380,39 @@ def add_publisher(record, new_record, options):
         seven_ten = pymarc.Field(tag = '710',
             indicators = ['2', ' '],
             subfields = [
-                'a', options['publisher'],
+                'a', publisher,
                 '4', 'pbl'
             ]
         )
-        new_record.add_field(seven_ten)
+        record.add_field(seven_ten)
+
+    return publisher
+
+def add_platform(record, options):
+    """
+    This is a convoluted way to avoid creating a new 710 if we already
+    have a matching 710 for digital platform.
+    """
+
+    platform = options['platform']
+    need_platform = True
 
+    # Iterate through all of the existing 710 fields
+    for sten in record.get_fields('710'):
+        for pub in sten.get_subfields('a'):
+            if pub == platform:
+                need_platform = False
+
+    if need_platform:
+        # Add the platform
+        seven_ten = pymarc.Field(tag = '710',
+            indicators = ['2', ' '],
+            subfields = [
+                'a', platform
+            ]
+        )
+        record.add_field(seven_ten)
 def mark_isbn_for_sfx(record, options):
     """
     Adds a $9 subfield to the 020 (ISBN) field to use for SFX look-ups
@@ -441,6 +504,11 @@ def add_restriction(new_record, options):
       * $9 - Institutional code to which this note applies
     """
 
+    # Add a period if the authorization ends with a number or letter
+    authnote = options['authorization']
+    if authnote[-1] not in '.)]':
+        authnote += '.'
+
     for library in options['libraries']:
         libopts = options['settings'].get_settings(library)
         # Add the access restriction note
@@ -448,8 +516,8 @@ def add_restriction(new_record, options):
             indicators = ['1', ' '],
             subfields = [
                 'a', libopts['access_note'],
-                'b', options['consortium'],
-                'e', options['authorization'],
+                'b', options['consortium'] + ' ; ',
+                'e', authnote,
                 '9', libopts['code']
             ]
         )
@@ -474,8 +542,26 @@ def add_cat_source(record, options):
         )
         record.add_field(forty)
 
+def add_marc_source(record, options):
+    """
+    Add a 598 field identifying the source MARC file name and processing date
+    """
 
-def process_urls(field, options):
+    global RECORD_COUNT
+    source = os.path.basename(options['input'])
+
+    marc_source = pymarc.Field(tag = '598',
+        indicators = [' ', ' '],
+        subfields = [
+            'a', source,
+            'b', date.today().isoformat(),
+            'c', str(RECORD_COUNT)
+        ]
+    )
+    record.add_field(marc_source)
+
+def process_urls(field, options, publisher):
     """Creates 856 fields required by Conifer"""
 
     new_fields = []
@@ -491,6 +577,11 @@ def process_urls(field, options):
             new_fields.append(enrich)
     else:
         for lib in options['libraries']:
+
+            # Tweak for Algoma for combined CUP/OUP
+            if lib == 'algoma' and 'Cambridge' in publisher:
+                continue
+
             data = options['settings'].get_settings(lib)
             subs = get_subfields(field, data)
             eight_five_six = pymarc.Field(tag = '856',