Run black and pylint-3 against prep_ebook_records.py

author Dan Scott <dan@coffeecode.net>

Tue, 21 Jan 2020 02:35:35 +0000 (21:35 -0500)

committer Dan Scott <dan@coffeecode.net>

Tue, 21 Jan 2020 02:35:35 +0000 (21:35 -0500)
author Dan Scott <dan@coffeecode.net>
Tue, 21 Jan 2020 02:35:35 +0000 (21:35 -0500)
committer Dan Scott <dan@coffeecode.net>
Tue, 21 Jan 2020 02:35:35 +0000 (21:35 -0500)
diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py

index d9fae25..3c9b8b7 100755 (executable)
--- a/tools/ebooks/prep_ebook_records.py
+++ b/tools/ebooks/prep_ebook_records.py
@@ -14,8 +14,15 @@ requirements that would be the same for each record and therefore can
  be accommodated in batch load.
  """
  
-import os, os.path, sys, getopt, pymarc, pymarc.marc8, re, json
-import codecs, copy
+import os
+import os.path
+import sys
+import getopt
+import pymarc
+import re
+import json
+import codecs
+import copy
  import requests
  from datetime import date
  from bs4 import BeautifulSoup
@@ -23,61 +30,64 @@ import traceback
  
  RECORD_COUNT = 0
  DUP_COUNT = 0
-GATEWAY_URL = "http://www.concat.ca/osrf-gateway-v1"
+GATEWAY_URL = "https://www.concat.ca/osrf-gateway-v1"
  OPENSRF_ISBN_CALL = "open-ils.search.biblio.isbn"
  OPENSRF_TCN_CALL = "open-ils.search.biblio.tcn"
  OPENSRF_KEYWORD_CALL = "open-ils.search.biblio.multiclass.query"
  OPTIONS = {}
  FILES = {}
  
-class Institution():
+
+class Institution:
      """Defines standard settings for each Conifer institution"""
  
      def __init__(self):
          """Initialize the Institution object"""
-        self.algoma = { \
-            "code": "ALGOMASYS", \
-            "lac_symbol": "OSTMA", \
-            "org_unit": "111", \
-            "ebrary_code": "algomauca", \
-            "proxy": "http://libproxy.auc.ca/login?url=", \
-            "link_text": "Available online", \
-            "sfx_url": "http://sfx.scholarsportal.info/algoma", \
-            "access_note": "Access restricted to users with a valid Algoma University ID ;" \
+        self.algoma = {
+            "code": "ALGOMASYS",
+            "lac_symbol": "OSTMA",
+            "org_unit": "111",
+            "ebrary_code": "algomauca",
+            "proxy": "http://libproxy.auc.ca/login?url=",
+            "link_text": "Available online",
+            "sfx_url": "http://sfx.scholarsportal.info/algoma",
+            "access_note": "Access restricted to users with a valid Algoma University ID ;",
          }
  
-        self.boreal = { \
-            "code": "BOREALSYS", \
-            "lac_symbol": "BOREALSYS", \
-            "org_unit": "135", \
-            "ebrary_code": "ocls", \
-            "proxy": "http://ra.ocls.ca/ra/login.aspx?url=", \
-            "link_text": "Disponible en ligne", \
-            "access_note": u"Accès réservé aux utilisateurs avec un ID valide Collège Boréal ;" \
+        self.boreal = {
+            "code": "BOREALSYS",
+            "lac_symbol": "BOREALSYS",
+            "org_unit": "135",
+            "ebrary_code": "ocls",
+            "proxy": "http://ra.ocls.ca/ra/login.aspx?url=",
+            "link_text": "Disponible en ligne",
+            "access_note": u"Accès réservé aux utilisateurs avec un ID valide Collège Boréal ;",
          }
  
-        self.laurentian = { \
-            "code": "LUSYS", \
-            "lac_symbol": "OSUL", \
-            "org_unit": "105", \
-            "ebrary_code": "jndlu", \
-            "gale_code": "sudb78095", \
-            "proxy": "https://login.librweb.laurentian.ca/login?url=", \
-            "link_text": "Available online / disponible en ligne", \
-            "sfx_url": "https://sfx.scholarsportal.info/laurentian", \
-            "access_note": "Access restricted to users with a valid Laurentian University ID ;" \
+        self.laurentian = {
+            "code": "LUSYS",
+            "lac_symbol": "OSUL",
+            "org_unit": "105",
+            "ebrary_code": "jndlu",
+            "gale_code": "sudb78095",
+            "proxy": "https://login.librweb.laurentian.ca/login?url=",
+            "link_text": "Available online / disponible en ligne",
+            "sfx_url": "https://sfx.scholarsportal.info/laurentian",
+            "access_note": "Access restricted to users with a valid Laurentian University ID ;",
          }
  
      def get_settings(self, lib):
          """Return the settings for a library by name"""
          return getattr(self, lib)
  
+
  def do_help():
-    '''
+    """
      Print help for the Conifer ebook MARC processor
-    '''
+    """
  
-    print('''
+    print(
+        """
  Conifer ebook MARC processor
  
  This script takes a set of MARC records and processes them to generate a set
@@ -152,34 +162,37 @@ Optional arguments:
  
  Examples:
      %s --algoma -i crkn.mrc -o /tmp/crkn_out.mrc -p "eBrary Inc."
-    ''' % (sys.argv[0],))
+    """
+        % (sys.argv[0],)
+    )
      sys.exit(0)
  
+
  def consolidate_options(opts):
      """Make long arguments the standard form in command line options"""
  
      _shortlong = {
-        '-i': '--input',
-        '-o': '--output',
-        '-a': '--authorization',
-        '-c': '--consortium',
-        '-C': '--clean',
-        '-d': '--duplicate',
-        '-e': '--ebrary',
-        '-F': '--from-format',
-        '-I': '--isbn-sfx',
-        '-p': '--publisher',
-        '-P': '--platform',
-        '-n': '--note',
-        '-t': '--tcn',
-        '-T': '--to-format',
-        '-u': '--url',
-        '-A': '--algoma',
-        '-B': '--boreal',
-        '-L': '--laurentian',
-        '-s': '--sample',
-        '-x': '--cut-field',
-        '-h': '--help'
+        "-i": "--input",
+        "-o": "--output",
+        "-a": "--authorization",
+        "-c": "--consortium",
+        "-C": "--clean",
+        "-d": "--duplicate",
+        "-e": "--ebrary",
+        "-F": "--from-format",
+        "-I": "--isbn-sfx",
+        "-p": "--publisher",
+        "-P": "--platform",
+        "-n": "--note",
+        "-t": "--tcn",
+        "-T": "--to-format",
+        "-u": "--url",
+        "-A": "--algoma",
+        "-B": "--boreal",
+        "-L": "--laurentian",
+        "-s": "--sample",
+        "-x": "--cut-field",
+        "-h": "--help",
      }
  
      _options = dict(opts)
@@ -190,18 +203,19 @@ def consolidate_options(opts):
  
      return _options
  
+
  def check_options(options):
      """Check the validity of options that were passed in"""
  
      _help = False
      _req = {
-        '--input': "* Missing -i / --input argument!",
-        '--output': "* Missing -o / --output argument!",
-        '--authorization': "* Missing -a / --authorization argument!",
-        '--publisher': "* Missing -p / --publisher argument!"
+        "--input": "* Missing -i / --input argument!",
+        "--output": "* Missing -o / --output argument!",
+        "--authorization": "* Missing -a / --authorization argument!",
+        "--publisher": "* Missing -p / --publisher argument!",
      }
  
-    if '--help' in options:
+    if "--help" in options:
          do_help()
  
      for reqkey, reqwarn in _req.items():
@@ -210,15 +224,15 @@ def check_options(options):
              _help = True
  
      _libraries = check_libraries(options)
-    if len(_libraries.keys()) == 0:
+    if not _libraries.keys():
          _help = True
  
-    if _help == True:
+    if _help:
          do_help()
  
      # Get the input and output files
-    _input = options['--input']
-    _output = options['--output']
+    _input = options["--input"]
+    _output = options["--output"]
  
      try:
          os.stat(_input)
@@ -233,27 +247,27 @@ def check_options(options):
          sys.exit(0)
  
      _bool_opts = {
-        '--clean': 'clean',
-        '--ebrary': 'ebrary',
-        '--isbn-sfx': 'isbn-sfx',
+        "--clean": "clean",
+        "--ebrary": "ebrary",
+        "--isbn-sfx": "isbn-sfx",
      }
  
      _string_opts = {
-        '--authorization': 'authorization',
-        '--consortium': 'consortium',
-        '--duplicate': 'duplicate',
-        '--from-format': 'from-format',
-        '--note': 'note',
-        '--platform': 'platform',
-        '--sample': 'sample',
-        '--cut-field': 'cut-field',
-        '--tcn': 'tcn',
-        '--to-format': 'to-format',
-        '--url': 'url',
+        "--authorization": "authorization",
+        "--consortium": "consortium",
+        "--duplicate": "duplicate",
+        "--from-format": "from-format",
+        "--note": "note",
+        "--platform": "platform",
+        "--sample": "sample",
+        "--cut-field": "cut-field",
+        "--tcn": "tcn",
+        "--to-format": "to-format",
+        "--url": "url",
      }
  
      clean_opts = dict()
-    clean_opts['publisher'] = append_period(options['--publisher'])
+    clean_opts["publisher"] = append_period(options["--publisher"])
  
      for optkey, optval in _bool_opts.items():
          if optkey in options:
@@ -263,30 +277,33 @@ def check_options(options):
          if optkey in options:
              clean_opts[optval] = options[optkey]
  
-    clean_opts['libraries'] = _libraries
-    clean_opts['input'] = _input
-    clean_opts['output'] = _output
-    clean_opts['settings'] = Institution()
+    clean_opts["libraries"] = _libraries
+    clean_opts["input"] = _input
+    clean_opts["output"] = _output
+    clean_opts["settings"] = Institution()
  
      return clean_opts
  
+
  def evergreen_request(method, *args, **kwargs):
      """Issue a basic gateway request against Evergreen"""
-
-    service = '.'.join(method.split('.')[:2])
-    kwargs.update({'service':service, 'method':method})
-    params =  ['%s=%s' % (k, quote(v)) for k, v in kwargs.items()]
-    params += ['param=%s' % quote(json.dumps(a)) for a in args]
-    url = '%s?%s' % (GATEWAY_URL, '&'.join(params))
-    #print '--->', url
+    from urllib.parse import quote
+
+    service = ".".join(method.split(".")[:2])
+    kwargs.update({"service": service, "method": method})
+    params = ["%s=%s" % (k, quote(v)) for k, v in kwargs.items()]
+    params += ["param=%s" % quote(json.dumps(a)) for a in args]
+    url = "%s?%s" % (GATEWAY_URL, "&".join(params))
+    # print '--->', url
      req = requests.get(url)
      resp = req.json()
-    if resp['status'] != 200:
-        raise Exception('error during evergreen request', resp)
-    payload = resp['payload']
-    #print '<---', payload
+    if resp["status"] != 200:
+        raise Exception("error during evergreen request", resp)
+    payload = resp["payload"]
+    # print '<---', payload
      return payload
  
+
  def url_check(record, options):
      """Check for a matching URL in Evergreen"""
  
@@ -295,26 +312,35 @@ def url_check(record, options):
      match = False
      match_id = 0
      # Oxford MARC files from ScholarsPortal have DOIs in 956(!)
-    for url in record.get_fields('856','956'):
-        for urlval in url.get_subfields('u'):
+    for url in record.get_fields("856", "956"):
+        for urlval in url.get_subfields("u"):
              # print "urlval", urlval
-            for library in options['libraries']:
-                libopts = options['settings'].get_settings(library)
-                keyword_info = evergreen_request(OPENSRF_KEYWORD_CALL,
-                    {'org_unit': libopts['org_unit'],
-                    'depth': 1, 'limit': 5, 'offset': 0,
-                    'visibility_limit': 3000,
-                    'default_class': 'keyword'},
-                    urlval, 1)
-                bib_ids = keyword_info[0]['ids']
+            for library in options["libraries"]:
+                libopts = options["settings"].get_settings(library)
+                keyword_info = evergreen_request(
+                    OPENSRF_KEYWORD_CALL,
+                    {
+                        "org_unit": libopts["org_unit"],
+                        "depth": 1,
+                        "limit": 5,
+                        "offset": 0,
+                        "visibility_limit": 3000,
+                        "default_class": "keyword",
+                    },
+                    urlval,
+                    1,
+                )
+                bib_ids = keyword_info[0]["ids"]
                  for bib_id in bib_ids:
                      match_id = bib_id
-                    print("* %d of %d - URL match on %s for %s"
-                       % (DUP_COUNT + 1, RECORD_COUNT, urlval, bib_id[0])
+                    print(
+                        "* %d of %d - URL match on %s for %s"
+                        % (DUP_COUNT + 1, RECORD_COUNT, urlval, bib_id[0])
                      )
                      match = True
      return match_id, match
  
+
  def tcn_check(record):
      """Check for a matching TCN in Evergreen"""
  
@@ -322,19 +348,21 @@ def tcn_check(record):
  
      match = False
      match_id = 0
-    for tcn in record.get_fields('001'):
+    for tcn in record.get_fields("001"):
          tcn_val = tcn.value()
          tcn_info = evergreen_request(OPENSRF_TCN_CALL, tcn_val)
-        bib_ids = tcn_info[0]['ids']
+        bib_ids = tcn_info[0]["ids"]
          # print "tcn_info", tcn_info
          for bib_id in bib_ids:
              match_id = bib_id
-            print("* %d of %d - TCN match on %s for %s"
+            print(
+                "* %d of %d - TCN match on %s for %s"
                  % (DUP_COUNT + 1, RECORD_COUNT, tcn_val, bib_id)
              )
              match = True
      return match_id, match
  
+
  def isbn_check(record):
      """Check for a matching ISBN in Evergreen"""
  
@@ -342,36 +370,39 @@ def isbn_check(record):
  
      match = False
      match_id = 0
-    for isbn in record.get_fields('020', '024'):
-        for isbnval in isbn.get_subfields('a', 'z'):
+    for isbn in record.get_fields("020", "024"):
+        for isbnval in isbn.get_subfields("a", "z"):
              isbn_val = clean_isbn(isbnval)
              isbn_info = evergreen_request(OPENSRF_ISBN_CALL, isbnval)
-            #print "count", isbn_info[0]['count']
-            bib_ids = isbn_info[0]['ids']
+            # print "count", isbn_info[0]['count']
+            bib_ids = isbn_info[0]["ids"]
              for bib_id in bib_ids:
                  match_id = bib_id
-                print("* %d of %d - ISBN match on %s for %s"
+                print(
+                    "* %d of %d - ISBN match on %s for %s"
                      % (DUP_COUNT + 1, RECORD_COUNT, isbn_val, bib_id)
                  )
                  match = True
      return match_id, match
  
+
  def append_period(text):
      """
      Append a period to the incoming text if required
      """
  
-    if text[-1] != '.':
-        text += '.'
+    if text[-1] != ".":
+        text += "."
  
      return text
  
+
  def check_libraries(options):
      """Build a dict of the libraries that were requested for this batch"""
  
      _libraries = dict()
-    for lib in ['algoma', 'boreal', 'laurentian']:
-        if '--' + lib in options:
+    for lib in ["algoma", "boreal", "laurentian"]:
+        if "--" + lib in options:
              _libraries[lib] = True
  
      return _libraries
@@ -380,12 +411,29 @@ def check_libraries(options):
  def parse_opts():
      """Get command-line arguments from the script"""
      try:
-        _short_opts = 'i:o:a:p:P:ABLc:eCId:F:T:t:u:n:s:x:h'
-        _long_opts = ['input=', 'output=', 'authorization=',
-            'publisher=', 'platform=', 'algoma', 'boreal', 'laurentian',
-            'consortium=', 'ebrary', 'clean', 'isbn-sfx', 'duplicate=',
-            'from-format=', 'to-format=', 'tcn=', 'url=', 'note=', 'sample=',
-            'cut-field=', 'help'
+        _short_opts = "i:o:a:p:P:ABLc:eCId:F:T:t:u:n:s:x:h"
+        _long_opts = [
+            "input=",
+            "output=",
+            "authorization=",
+            "publisher=",
+            "platform=",
+            "algoma",
+            "boreal",
+            "laurentian",
+            "consortium=",
+            "ebrary",
+            "clean",
+            "isbn-sfx",
+            "duplicate=",
+            "from-format=",
+            "to-format=",
+            "tcn=",
+            "url=",
+            "note=",
+            "sample=",
+            "cut-field=",
+            "help",
          ]
          opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts)
      except getopt.GetoptError as ex:
@@ -395,181 +443,177 @@ def parse_opts():
      _options = consolidate_options(opts[0])
      return check_options(_options)
  
+
  def process_marc(options):
      """Converts raw ebook MARC records to Conifer-ready MARC records"""
  
      global FILES
      files = FILES
  
-    if 'from-format' in options and options['from-format'] == 'xml':
-        pymarc.map_xml(process_xml, options['input'])
+    if "from-format" in options and options["from-format"] == "xml":
+        pymarc.map_xml(process_xml, options["input"])
      else:
          try:
              reader = pymarc.MARCReader(
-                open(options['input'], mode='rb'), to_unicode=True
+                open(options["input"], mode="rb"), to_unicode=True
              )
          except Exception as ex:
-            print("Could not open input file [%s]" % options['input'])
+            print("Could not open input file [%s]" % options["input"])
  
          for record in reader:
              process_record(record, options, files)
  
+
  def process_record(record, options, files):
      global RECORD_COUNT
      global DUP_COUNT
      RECORD_COUNT += 1
      try:
-        if not (record['856'] and record['856']['u']):
-            print("* No 856 for record # %s in file %s"
-                    % (RECORD_COUNT, options['input'])
+        if not (record["856"] and record["856"]["u"]):
+            print(
+                "* No 856 for record # %s in file %s" % (RECORD_COUNT, options["input"])
              )
          else:
-            print("%d - %s" % (RECORD_COUNT, record['856']))
+            print("%d - %s" % (RECORD_COUNT, record["856"]))
  
          dupe_flags = {}
  
-        if 'duplicate' in files:
+        if "duplicate" in files:
              tmp_record = process_fields(copy.deepcopy(record), options)
-            bib_id, dupe_flags['isbn'] = isbn_check(tmp_record)
-            if dupe_flags['isbn']:
+            bib_id, dupe_flags["isbn"] = isbn_check(tmp_record)
+            if dupe_flags["isbn"]:
                  tmp_record = add_dupe_field(tmp_record, bib_id)
-                files['duplicate'].write(tmp_record)
+                files["duplicate"].write(tmp_record)
              else:
-                del(dupe_flags['isbn'])
+                del dupe_flags["isbn"]
  
-        if 'tcn' in files and len(dupe_flags) == 0:
+        if "tcn" in files and not dupe_flags:
              tmp_record = process_fields(copy.deepcopy(record), options)
-            bib_id, dupe_flags['tcn'] = tcn_check(tmp_record)
-            if dupe_flags['tcn']:
+            bib_id, dupe_flags["tcn"] = tcn_check(tmp_record)
+            if dupe_flags["tcn"]:
                  tmp_record = add_dupe_field(tmp_record, bib_id)
-                files['tcn'].write(tmp_record)
+                files["tcn"].write(tmp_record)
              else:
-                del(dupe_flags['tcn'])
+                del dupe_flags["tcn"]
  
-        if 'url' in files and len(dupe_flags) == 0:
+        if "url" in files and not dupe_flags:
              tmp_record = process_fields(copy.deepcopy(record), options)
-            bib_id, dupe_flags['url'] = url_check(tmp_record, options)
-            if dupe_flags['url']:
+            bib_id, dupe_flags["url"] = url_check(tmp_record, options)
+            if dupe_flags["url"]:
                  tmp_record = add_dupe_field(tmp_record, bib_id)
-                files['url'].write(tmp_record)
+                files["url"].write(tmp_record)
              else:
-                del(dupe_flags['url'])
+                del dupe_flags["url"]
  
-        if len(dupe_flags):
+        if dupe_flags:
              DUP_COUNT += 1
          else:
              new_record = process_fields(record, options)
-            if 'to-format' in options and options['to-format'] == 'xml':
-                new_record = pymarc.record_to_xml(new_record) + '\n'
-            files['output'].write(new_record)
-            if ('sample' in files and (
-                (RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0)
-            )):
-                files['sample'].write(new_record)
+            if "to-format" in options and options["to-format"] == "xml":
+                new_record = pymarc.record_to_xml(new_record) + "\n"
+            files["output"].write(new_record)
+            if "sample" in files and ((RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0)):
+                files["sample"].write(new_record)
      except Exception as ex:
          print("* Error processing record %d - %s" % (RECORD_COUNT, ex))
          traceback.print_exc()
  
+
  def process_fields(record, options):
      """Decide which fields to add, delete, and keep"""
  
      new_record = pymarc.Record(to_unicode=True, force_utf8=True)
-    leader = record.leader[:6] + 'a' + record.leader[7:]
+    leader = record.leader[:6] + "a" + record.leader[7:]
      new_record.leader = leader
  
-    add_cat_source(record, options) # 040
+    add_cat_source(record, options)  # 040
  
      # 590
-    if 'note' in options:
-        note_value = options['note']
-        note = pymarc.Field(tag = '590',
-            indicators = [' ', ' '],
-            subfields = [
-                'a', note_value
-            ]
+    if "note" in options:
+        note_value = options["note"]
+        note = pymarc.Field(
+            tag="590", indicators=[" ", " "], subfields=["a", note_value]
          )
          record.add_ordered_field(note)
  
-    add_marc_source(record, options) # 598
-    if record.get_fields('336') is None:
-        add_rda_fields(record, options) # 336,337,338
-    publisher = add_publisher(record, options) # 710
-    add_restriction(record, options, publisher) # 506
-    add_platform(record, options) # 710
+    add_marc_source(record, options)  # 598
+    if record.get_fields("336") is None:
+        add_rda_fields(record, options)  # 336,337,338
+    publisher = add_publisher(record, options)  # 710
+    add_restriction(record, options, publisher)  # 506
+    add_platform(record, options)  # 710
  
-    if 'isbn-sfx' in options:
+    if "isbn-sfx" in options:
          marked_isbn = mark_isbn_for_sfx(record, options)
  
      for field in record.get_fields():
-        if 'clean' in options:
+        if "clean" in options:
              field = clean_diacritics(field)
          # Process all of the 856 fields
          # Oxford MARC files from ScholarsPortal have DOIs in 956(!)
-        if field.tag == '856' or field.tag == '956':
+        if field.tag == "856" or field.tag == "956":
              new_fields = process_urls(field, options, publisher)
              if new_fields:
                  for new_856 in new_fields:
                      new_record.add_ordered_field(new_856)
          # Strip out 9xx fields: we don't want local fields in our records
          # except for 924 fields that we create
-        elif field.tag[0] == '9' and field.tag != '924':
+        elif field.tag[0] == "9" and field.tag != "924":
              pass
          # ISBN cleanup
-        elif field.tag == '020':
+        elif field.tag == "020":
              new_isbn = create_clean_isbn(field)
              new_record.add_ordered_field(new_isbn)
          # Strip out 300 fields that only contain placeholders
-        elif field.tag == '300' and field['a'] == 'p. cm.':
+        elif field.tag == "300" and field["a"] == "p. cm.":
              pass
          # Add relator URIs
-        elif field.tag.startswith('33') and field['0'] is None:
+        elif field.tag.startswith("33") and field["0"] is None:
              field = add_relator_uri(field)
              new_record.add_ordered_field(field)
          # Strip out useless fields
-        elif 'cut-field' in options and field.tag in options['cut-field']:
+        elif "cut-field" in options and field.tag in options["cut-field"]:
              pass
-        elif field.tag == '008' and field.value()[23] != 's':
+        elif field.tag == "008" and field.value()[23] != "s":
              fixed_field = pymarc.Field(
-                tag='008',
-                data=field.value()[:23] + 's' + field.value()[24:]
+                tag="008", data=field.value()[:23] + "s" + field.value()[24:]
              )
              new_record.add_ordered_field(fixed_field)
          # Strip out GMD
-        elif field.tag == '245':
-            if 'h' in field:
-                suffix = field['h'][-3:]
-                field.delete_subfield('h')
-                field['a'] = field['a'] + suffix
+        elif field.tag == "245":
+            if "h" in field:
+                # Grab the trailing " /"
+                suffix = field["h"][-2:]
+                field.delete_subfield("h")
+                field["a"] = field["a"] + suffix
              new_record.add_ordered_field(field)
          else:
              new_record.add_ordered_field(field)
  
-    if 'isbn-sfx' in options and not marked_isbn:
+    if "isbn-sfx" in options and not marked_isbn:
          try:
-            isbn = record['020']['a']
-            print("ISBN: [%s] - no matching ISBN target found in SFX for %s" %
-                (isbn, new_record['856']['u'])
+            isbn = record["020"]["a"]
+            print(
+                "ISBN: [%s] - no matching ISBN target found in SFX for %s"
+                % (isbn, new_record["856"]["u"])
              )
          except:
-            print("No matching ISBN target found in SFX for %s" %
-                (new_record['856']['u'])
+            print(
+                "No matching ISBN target found in SFX for %s" % (new_record["856"]["u"])
              )
  
      return new_record
  
+
  def add_dupe_field(record, bib_id):
      """Add a 909 field marking the duplicate record"""
  
      dup_value = str(bib_id)
-    dup = pymarc.Field(tag = '909',
-        indicators = [' ', ' '],
-        subfields = [
-            'a', dup_value
-        ]
-    )
+    dup = pymarc.Field(tag="909", indicators=[" ", " "], subfields=["a", dup_value])
      record.add_ordered_field(dup)
      return record
  
+
  def clean_diacritics(field):
      """
      Change specific patterns of bytes into other patterns of bytes
@@ -582,135 +626,142 @@ def clean_diacritics(field):
          return field
  
      new_field = pymarc.Field(
-        tag=field.tag,
-        indicators=[field.indicator1, field.indicator2]
+        tag=field.tag, indicators=[field.indicator1, field.indicator2]
      )
  
      for subfield in field:
-        if r'\x' not in repr(subfield[1]):
+        if r"\x" not in repr(subfield[1]):
              new_field.add_subfield(subfield[0], subfield[1])
              continue
  
          # Let the substitutions commence - maybe move to a map table?
  
          # COMBINING MACRON
-        tmpsf = subfield[1].replace(u'\xd5A', u'A\u0304')
-        tmpsf = tmpsf.replace(u'\xd5a', u'a\u0304')
-        tmpsf = tmpsf.replace(u'\xd5E', u'E\u0304')
-        tmpsf = tmpsf.replace(u'\xd5e', u'e\u0304')
-        tmpsf = tmpsf.replace(u'\xd5I', u'I\u0304')
-        tmpsf = tmpsf.replace(u'\xd5i', u'i\u0304')
-        tmpsf = tmpsf.replace(u'\xd5O', u'O\u0304')
-        tmpsf = tmpsf.replace(u'\xd5o', u'o\u0304')
-        tmpsf = tmpsf.replace(u'\xd5U', u'U\u0304')
-        tmpsf = tmpsf.replace(u'\xd5u', u'u\u0304')
+        tmpsf = subfield[1].replace(u"\xd5A", u"A\u0304")
+        tmpsf = tmpsf.replace(u"\xd5a", u"a\u0304")
+        tmpsf = tmpsf.replace(u"\xd5E", u"E\u0304")
+        tmpsf = tmpsf.replace(u"\xd5e", u"e\u0304")
+        tmpsf = tmpsf.replace(u"\xd5I", u"I\u0304")
+        tmpsf = tmpsf.replace(u"\xd5i", u"i\u0304")
+        tmpsf = tmpsf.replace(u"\xd5O", u"O\u0304")
+        tmpsf = tmpsf.replace(u"\xd5o", u"o\u0304")
+        tmpsf = tmpsf.replace(u"\xd5U", u"U\u0304")
+        tmpsf = tmpsf.replace(u"\xd5u", u"u\u0304")
  
          # LATIN LETTER C WITH ACUTE
-        tmpsf = tmpsf.replace(u'\xd4C', u'\u0106')
-        tmpsf = tmpsf.replace(u'\xd4c', u'\u0107')
+        tmpsf = tmpsf.replace(u"\xd4C", u"\u0106")
+        tmpsf = tmpsf.replace(u"\xd4c", u"\u0107")
  
          # LATIN LETTER L WITH STROKE
-        tmpsf = tmpsf.replace(u'\u00b0', u'\u0141')
+        tmpsf = tmpsf.replace(u"\u00b0", u"\u0141")
  
-        lstroke = tmpsf.find(u'\00b1')
-        if lstroke and tmpsf[lstroke + 1] == 'i':
+        lstroke = tmpsf.find(u"\00b1")
+        if lstroke and tmpsf[lstroke + 1] == "i":
              # Modifier prime instead
-            tmpsf = tmpsf.replace(u'\u00b1', u'\u02b9')
+            tmpsf = tmpsf.replace(u"\u00b1", u"\u02b9")
          else:
-            tmpsf = tmpsf.replace(u'\u00b1', u'\u0142')
+            tmpsf = tmpsf.replace(u"\u00b1", u"\u0142")
  
          # COMBINING MODIFIER LETTER HALF RING
-        tmpsf = tmpsf.replace(u'\xb1', u'\u02be')
+        tmpsf = tmpsf.replace(u"\xb1", u"\u02be")
  
          # COMBINING TILDE
-        tmpsf = tmpsf.replace(u'\xf5n', u'n\u0303')
+        tmpsf = tmpsf.replace(u"\xf5n", u"n\u0303")
  
          # COMBINING CEDILLA
-        tmpsf = tmpsf.replace(u'\xb0c', u'c\u0327')
-        tmpsf = tmpsf.replace(u'\u01afS', u'S\u0327')
-        tmpsf = tmpsf.replace(u'\u01afs', u's\u0327')
+        tmpsf = tmpsf.replace(u"\xb0c", u"c\u0327")
+        tmpsf = tmpsf.replace(u"\u01afS", u"S\u0327")
+        tmpsf = tmpsf.replace(u"\u01afs", u"s\u0327")
  
          # S WITH COMBINING ACUTE ACCENT
-        tmpsf = tmpsf.replace(u'\xd4S', u'\u015a')
-        tmpsf = tmpsf.replace(u'\xd4s', u'\u015b')
+        tmpsf = tmpsf.replace(u"\xd4S", u"\u015a")
+        tmpsf = tmpsf.replace(u"\xd4s", u"\u015b")
  
          # A CARON
-        tmpsf = tmpsf.replace(u'\xdaA', u'\u0100')
-        tmpsf = tmpsf.replace(u'\xdaa', u'\u0101')
+        tmpsf = tmpsf.replace(u"\xdaA", u"\u0100")
+        tmpsf = tmpsf.replace(u"\xdaa", u"\u0101")
  
          # C CARON
-        tmpsf = tmpsf.replace(u'\xdaC', u'\u010c')
-        tmpsf = tmpsf.replace(u'\xdac', u'\u010d')
+        tmpsf = tmpsf.replace(u"\xdaC", u"\u010c")
+        tmpsf = tmpsf.replace(u"\xdac", u"\u010d")
  
          # R CARON
-        tmpsf = tmpsf.replace(u'\xdaR', u'\u0158')
-        tmpsf = tmpsf.replace(u'\xdar', u'\u0159')
+        tmpsf = tmpsf.replace(u"\xdaR", u"\u0158")
+        tmpsf = tmpsf.replace(u"\xdar", u"\u0159")
  
          # E BREVE
-        tmpsf = tmpsf.replace(u'\xe6E', u'\u0114')
-        tmpsf = tmpsf.replace(u'\xe6e', u'\u0115')
+        tmpsf = tmpsf.replace(u"\xe6E", u"\u0114")
+        tmpsf = tmpsf.replace(u"\xe6e", u"\u0115")
  
          # S CARON
-        tmpsf = tmpsf.replace(u'\xdaS', u'\u0160')
-        tmpsf = tmpsf.replace(u'\xdas', u'\u0161')
+        tmpsf = tmpsf.replace(u"\xdaS", u"\u0160")
+        tmpsf = tmpsf.replace(u"\xdas", u"\u0161")
  
          # U CARON
-        tmpsf = tmpsf.replace(u'\u00e6U', u'\u01d3')
-        tmpsf = tmpsf.replace(u'\u00e6u', u'\u01d4')
+        tmpsf = tmpsf.replace(u"\u00e6U", u"\u01d3")
+        tmpsf = tmpsf.replace(u"\u00e6u", u"\u01d4")
  
          # G BREVE
-        tmpsf = tmpsf.replace(u'\xe6G', u'\u011e')
-        tmpsf = tmpsf.replace(u'\xe6g', u'\u011f')
+        tmpsf = tmpsf.replace(u"\xe6G", u"\u011e")
+        tmpsf = tmpsf.replace(u"\xe6g", u"\u011f")
  
          # I BREVE
-        tmpsf = tmpsf.replace(u'\xe6I', u'\u012c')
-        tmpsf = tmpsf.replace(u'\xe6i', u'\u012d')
+        tmpsf = tmpsf.replace(u"\xe6I", u"\u012c")
+        tmpsf = tmpsf.replace(u"\xe6i", u"\u012d")
  
          # COMBINING DOT ABOVE
-        tmpsf = tmpsf.replace(u'\xfeI', u'I\u0307')
+        tmpsf = tmpsf.replace(u"\xfeI", u"I\u0307")
  
          # COMBINING LIGATURE LEFT HALF
-        tmpsf = tmpsf.replace(u'\xd9i', u'i\ufe20')
-        tmpsf = tmpsf.replace(u'\xd9I', u'I\ufe20')
-        tmpsf = tmpsf.replace(u'\xd9t', u't\ufe20')
+        tmpsf = tmpsf.replace(u"\xd9i", u"i\ufe20")
+        tmpsf = tmpsf.replace(u"\xd9I", u"I\ufe20")
+        tmpsf = tmpsf.replace(u"\xd9t", u"t\ufe20")
  
          # COMBINING LIGATURE RIGHT HALF
-        tmpsf = tmpsf.replace(u'\xfda', u'a\ufe21')
-        tmpsf = tmpsf.replace(u'\xfds', u's\ufe21')
-        tmpsf = tmpsf.replace(u'\xfdU', u'U\ufe21')
+        tmpsf = tmpsf.replace(u"\xfda", u"a\ufe21")
+        tmpsf = tmpsf.replace(u"\xfds", u"s\ufe21")
+        tmpsf = tmpsf.replace(u"\xfdU", u"U\ufe21")
  
          # MODIFIER LETTER PRIME
-        tmpsf = tmpsf.replace(u'\xf0', u'\u02b9')
+        tmpsf = tmpsf.replace(u"\xf0", u"\u02b9")
  
          # LATIN SMALL LETTER DOTLESS I
-        tmpsf = tmpsf.replace(u'\u00a9', u'\u0131')
+        tmpsf = tmpsf.replace(u"\u00a9", u"\u0131")
  
          # LATIN LETTER E WITH DOT ABOVE
-        tmpsf = tmpsf.replace(u'\u00feE', u'\u0116')
-        tmpsf = tmpsf.replace(u'\u00fee', u'\u0117')
+        tmpsf = tmpsf.replace(u"\u00feE", u"\u0116")
+        tmpsf = tmpsf.replace(u"\u00fee", u"\u0117")
  
          new_field.add_subfield(subfield[0], tmpsf)
          global RECORD_COUNT
-        if r'\x' in repr(tmpsf):
-            print(" * %d Hex value found in %s:%s - [%s] [%s]" % (
-                RECORD_COUNT, field.tag, subfield[0],
-                tmpsf.encode('utf8'), repr(tmpsf)
-            ))
+        if r"\x" in repr(tmpsf):
+            print(
+                " * %d Hex value found in %s:%s - [%s] [%s]"
+                % (
+                    RECORD_COUNT,
+                    field.tag,
+                    subfield[0],
+                    tmpsf.encode("utf8"),
+                    repr(tmpsf),
+                )
+            )
  
-        if (repr(subfield[1]) != repr(tmpsf)):
-            print("* %d\tOld: [%s]\tNew: [%s]" % (
-                RECORD_COUNT, subfield[1].encode('utf8'), tmpsf.encode('utf8')
-            ))
+        if repr(subfield[1]) != repr(tmpsf):
+            print(
+                "* %d\tOld: [%s]\tNew: [%s]"
+                % (RECORD_COUNT, subfield[1].encode("utf8"), tmpsf.encode("utf8"))
+            )
  
      return new_field
  
+
  def add_publisher(record, options):
      """
      This is a convoluted way to avoid creating a new 710 if we already
      have a matching 710 and just need to add the publisher relator code.
      """
  
-    publisher = options['publisher']
+    publisher = options["publisher"]
  
      munge_publisher = False
      need_publisher = True
@@ -718,75 +769,79 @@ def add_publisher(record, options):
  
      raw_publisher = None
      try:
-        raw_publisher = record['260']['b']
+        raw_publisher = record["260"]["b"]
      except:
          pass
  
      if raw_publisher:
-        if 'Oxford' in raw_publisher or 'Clarendon' in raw_publisher:
-            publisher = 'Oxford University Press.'
-        elif 'Cambridge' in raw_publisher:
-            publisher = 'Cambridge University Press.'
+        if "Oxford" in raw_publisher or "Clarendon" in raw_publisher:
+            publisher = "Oxford University Press."
+        elif "Cambridge" in raw_publisher:
+            publisher = "Cambridge University Press."
  
      # Iterate through all of the existing 710 fields
-    for sten in record.get_fields('710'):
-        for pub in sten.get_subfields('a'):
+    for sten in record.get_fields("710"):
+        for pub in sten.get_subfields("a"):
              if pub == publisher:
                  munge_publisher = True
-                for rel in sten.get_subfields('4'):
-                    if rel == 'pbl':
+                for rel in sten.get_subfields("4"):
+                    if rel == "pbl":
                          uri_for_relator = True
                          need_publisher = False
                          need_relator = False
  
                  if munge_publisher:
                      if need_relator:
-                        sten.add_subfield('4', 'http://id.loc.gov/vocabulary/relators/pbl')
+                        sten.add_subfield(
+                            "4", "http://id.loc.gov/vocabulary/relators/pbl"
+                        )
                      elif uri_for_relator:
-                        sten['4'] = 'http://id.loc.gov/vocabulary/relators/pbl'
+                        sten["4"] = "http://id.loc.gov/vocabulary/relators/pbl"
                      need_publisher = False
  
      if need_publisher:
          # Add the publisher, with relator code
-        seven_ten = pymarc.Field(tag = '710',
-            indicators = ['2', ' '],
-            subfields = [
-                'a', publisher,
-                '4', 'http://id.loc.gov/vocabulary/relators/pbl'
-            ]
+        seven_ten = pymarc.Field(
+            tag="710",
+            indicators=["2", " "],
+            subfields=[
+                "a",
+                publisher,
+                "4",
+                "http://id.loc.gov/vocabulary/relators/pbl",
+            ],
          )
          record.add_ordered_field(seven_ten)
  
      return publisher
  
+
  def add_platform(record, options):
      """
      This is a convoluted way to avoid creating a new 710 if we already
      have a matching 710 for digital platform.
      """
  
-    if not 'platform' in options:
+    if not "platform" in options:
          return False
  
-    platform = options['platform']
+    platform = options["platform"]
      need_platform = True
  
      # Iterate through all of the existing 710 fields
-    for sten in record.get_fields('710'):
-        for pub in sten.get_subfields('a'):
-            if pub == platform or (pub == platform + '.'):
+    for sten in record.get_fields("710"):
+        for pub in sten.get_subfields("a"):
+            if pub == platform or (pub == platform + "."):
                  need_platform = False
  
      if need_platform:
          # Add the platform
-        seven_ten = pymarc.Field(tag = '710',
-            indicators = ['2', ' '],
-            subfields = [
-                'a', platform
-            ]
+        seven_ten = pymarc.Field(
+            tag="710", indicators=["2", " "], subfields=["a", platform]
          )
          record.add_ordered_field(seven_ten)
  
+
  def mark_isbn_for_sfx(record, options):
      """
      Adds a $9 subfield to the 020 (ISBN) field to use for SFX look-ups
@@ -797,29 +852,27 @@ def mark_isbn_for_sfx(record, options):
      """
  
      # For every ISBN in the record
-    for isbn in record.get_fields('020', '024'):
-        for isbnval in isbn.get_subfields('a', 'z'):
+    for isbn in record.get_fields("020", "024"):
+        for isbnval in isbn.get_subfields("a", "z"):
              isbnval = clean_isbn(isbnval)
              # And for every library we have enabled
-            for lib in options['libraries']:
-                if lib == 'boreal':
+            for lib in options["libraries"]:
+                if lib == "boreal":
                      return False
                  found = check_for_isbn(options, lib, isbnval)
                  if found:
                      # Add the $9 subfield to mark this as a good one
-                    isbn.add_subfield('9', 'SFX')
+                    isbn.add_subfield("9", "SFX")
                      return True
  
      # For ebrary records, add a 924 for the custom URN
-    if 'ebrary' in options:
+    if "ebrary" in options:
          urn = None
-        for scn in record.get_fields('001'):
-            urn = pymarc.Field(tag = '924',
-                indicators = ['8', ' '],
-                subfields = [
-                    'a', scn.value(),
-                    '9', 'SFX'
-                ]
+        for scn in record.get_fields("001"):
+            urn = pymarc.Field(
+                tag="924",
+                indicators=["8", " "],
+                subfields=["a", scn.value(), "9", "SFX"],
              )
  
          if urn is not None:
@@ -828,18 +881,23 @@ def mark_isbn_for_sfx(record, options):
  
      return False
  
+
  def check_for_isbn(options, lib, isbnval):
      """
      Given an ISBN value, check SFX at the specified library for a match
      """
-    sfx = options['settings'].get_settings(lib)['sfx_url']
-    url = "%s?url_ver=Z39.88-2004&url_ctx_fmt=infofi/fmt:kev:mtx:ctx&" \
-        "ctx_enc=UTF-8&ctx_ver=Z39.88-2004&rfr_id=info:sid/evergreen&" \
-        "sfx.ignore_date_threshold=1&" \
-        "sfx.response_type=multi_obj_detailed_xml" \
+    sfx = options["settings"].get_settings(lib)["sfx_url"]
+    url = (
+        "%s?url_ver=Z39.88-2004&url_ctx_fmt=infofi/fmt:kev:mtx:ctx&"
+        "ctx_enc=UTF-8&ctx_ver=Z39.88-2004&rfr_id=info:sid/evergreen&"
+        "sfx.ignore_date_threshold=1&"
+        "sfx.response_type=multi_obj_detailed_xml"
          "&__service_type=getFullTxt&rft.isbn=%s" % (sfx, isbnval)
+    )
  
-    headers = {'user-agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0'}
+    headers = {
+        "user-agent": "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0"
+    }
      req = requests.get(url, headers=headers)
      try:
          req.raise_for_status()
@@ -853,20 +911,19 @@ def check_for_isbn(options, lib, isbnval):
      sfx_res = BeautifulSoup(req.text, "html.parser")
  
      # We want a target with a service_type element of 'getFullTxt'
-    targets = sfx_res.ctx_obj.ctx_obj_targets.findAll(
-        'target', recursive=False
-    )
+    targets = sfx_res.ctx_obj.ctx_obj_targets.findAll("target", recursive=False)
  
      if len(targets) == 0:
          # No SFX targets found for this ISBN - next!
          return False
  
      for target in targets:
-        if target.service_type.renderContents() == 'getFullTxt':
+        if target.service_type.renderContents() == "getFullTxt":
              return True
  
      return False
  
+
  def clean_isbn(isbn):
      """
      Return a normalized ISBN from a MARC subfield
@@ -879,16 +936,17 @@ def clean_isbn(isbn):
      isbn = isbn.strip()
  
      # Grab the first string beginning with a digit
-    isbn_match = re.search(r'^[\D]*([\d]+\S+).*?$', isbn)
+    isbn_match = re.search(r"^[\D]*([\d]+\S+).*?$", isbn)
  
      if not isbn_match.group(1):
          return None
  
      # Replace hyphens
-    isbn = isbn_match.group(1).replace('-', '')
+    isbn = isbn_match.group(1).replace("-", "")
  
      return isbn
  
+
  def add_restriction(new_record, options, publisher):
      """
      Adds a 506 access restriction note per institution
@@ -901,123 +959,148 @@ def add_restriction(new_record, options, publisher):
      """
  
      # Add a period if the authorization ends with a number or letter
-    authnote = options['authorization']
-    if authnote[-1] not in '.)]':
-        authnote += '.'
+    authnote = options["authorization"]
+    if authnote[-1] not in ".)]":
+        authnote += "."
  
-    for library in options['libraries']:
+    for library in options["libraries"]:
  
          # Skip auth note if Algoma + CUP
-        if library == 'algoma' and 'Cambridge' in publisher:
+        if library == "algoma" and "Cambridge" in publisher:
              continue
  
-        libopts = options['settings'].get_settings(library)
+        libopts = options["settings"].get_settings(library)
          # Add the access restriction note
-        if 'consortium' in options:
+        if "consortium" in options:
              subfields = [
-                'a', append_space_semi_space(libopts['access_note']),
-                'b', append_space_semi_space(options['consortium']),
-                'e', authnote,
-                '9', libopts['lac_symbol']
+                "a",
+                append_space_semi_space(libopts["access_note"]),
+                "b",
+                append_space_semi_space(options["consortium"]),
+                "e",
+                authnote,
+                "9",
+                libopts["lac_symbol"],
              ]
          else:
              subfields = [
-                'a', append_space_semi_space(libopts['access_note']),
-                'e', authnote,
-                '9', libopts['lac_symbol']
+                "a",
+                append_space_semi_space(libopts["access_note"]),
+                "e",
+                authnote,
+                "9",
+                libopts["lac_symbol"],
              ]
-        note = pymarc.Field(tag = '506',
-            indicators = ['1', ' '],
-            subfields = subfields
-        )
+        note = pymarc.Field(tag="506", indicators=["1", " "], subfields=subfields)
          new_record.add_ordered_field(note)
  
+
  def append_space_semi_space(note):
      """
      Try to ensure the given text ends with ' ; '
      """
  
-    if note[-3:] == ' ; ':
+    if note[-3:] == " ; ":
          pass
-    elif note[-1] == ';':
-        note += ' '
-    elif note[-1] == ' ':
-        note += '; '
+    elif note[-1] == ";":
+        note += " "
+    elif note[-1] == " ":
+        note += "; "
      else:
-        note += ' ; '
+        note += " ; "
  
      return note
  
+
  def add_cat_source(record, options):
      """Add or extend the 040 field to identify the cataloguing source"""
  
      # Only do this for Laurentian
-    if 'laurentian' not in options['libraries']:
+    if "laurentian" not in options["libraries"]:
          return
  
-    cat_source = record['040']
+    cat_source = record["040"]
      if cat_source:
          # Add subfield 'd' identifying Laurentian
-        cat_source.add_subfield('d', 'CaOSUL')
+        cat_source.add_subfield("d", "CaOSUL")
      else:
          # Add a 040 with subfield 'd' identifying Laurentian
-        forty = pymarc.Field(tag = '040',
-            indicators = [' ', ' '],
-            subfields = [ 'd', 'CaOSUL' ]
+        forty = pymarc.Field(
+            tag="040", indicators=[" ", " "], subfields=["d", "CaOSUL"]
          )
          record.add_ordered_field(forty)
  
+
  def add_relator_uri(field):
      """
      Add URIs to RDA 33x fields
      """
  
-    if 'b' not in field:
+    if "b" not in field:
          pass
-    elif field.tag == '336':
-        field.add_subfield('0', 'http://id.loc.gov/vocabulary/contentTypes/' + field['b'])
-    elif field.tag == '337':
-        field.add_subfield('0', 'http://id.loc.gov/vocabulary/mediaTypes/' + field['b'])
-    elif field.tag == '338':
-        field.add_subfield('0', 'http://id.loc.gov/vocabulary/carriers/' + field['b'])
+    elif field.tag == "336":
+        field.add_subfield(
+            "0", "http://id.loc.gov/vocabulary/contentTypes/" + field["b"]
+        )
+    elif field.tag == "337":
+        field.add_subfield("0", "http://id.loc.gov/vocabulary/mediaTypes/" + field["b"])
+    elif field.tag == "338":
+        field.add_subfield("0", "http://id.loc.gov/vocabulary/carriers/" + field["b"])
  
      return field
  
+
  def add_rda_fields(record):
      """
      Add 336,337,338 fields identifying the content as an ebook
      """
-    content = pymarc.Field(tag = '336',
-        indicators = [' ', ' '],
-        subfields = [
-            'a', 'text',
-            'b', 'txt',
-            '2', 'rdacontent',
-            '0', 'http://id.loc.gov/vocabulary/contentTypes/txt'
-        ]
+    content = pymarc.Field(
+        tag="336",
+        indicators=[" ", " "],
+        subfields=[
+            "a",
+            "text",
+            "b",
+            "txt",
+            "2",
+            "rdacontent",
+            "0",
+            "http://id.loc.gov/vocabulary/contentTypes/txt",
+        ],
      )
-    media = pymarc.Field(tag = '337',
-        indicators = [' ', ' '],
-        subfields = [
-            'a', 'computer',
-            'b', 'c',
-            '2', 'rdamedia',
-            '0', 'http://id.loc.gov/vocabulary/mediaTypes/c'
-        ]
+    media = pymarc.Field(
+        tag="337",
+        indicators=[" ", " "],
+        subfields=[
+            "a",
+            "computer",
+            "b",
+            "c",
+            "2",
+            "rdamedia",
+            "0",
+            "http://id.loc.gov/vocabulary/mediaTypes/c",
+        ],
      )
-    carrier = pymarc.Field(tag = '338',
-        indicators = [' ', ' '],
-        subfields = [
-            'a', 'online resource',
-            'b', 'cr',
-            '2', 'rdacarrier',
-            '0', 'http://id.loc.gov/vocabulary/carriers/cr'
-        ]
+    carrier = pymarc.Field(
+        tag="338",
+        indicators=[" ", " "],
+        subfields=[
+            "a",
+            "online resource",
+            "b",
+            "cr",
+            "2",
+            "rdacarrier",
+            "0",
+            "http://id.loc.gov/vocabulary/carriers/cr",
+        ],
      )
      record.add_ordered_field(content)
      record.add_ordered_field(media)
      record.add_ordered_field(carrier)
  
+
  def add_marc_source(record, options):
      """
      Add a 598 field identifying the source MARC file name and processing date
@@ -1025,87 +1108,85 @@ def add_marc_source(record, options):
  
      global RECORD_COUNT
  
-    source = os.path.basename(options['input'])
+    source = os.path.basename(options["input"])
  
-    marc_source = pymarc.Field(tag = '598',
-        indicators = [' ', ' '],
-        subfields = [
-            'a', source,
-            'b', date.today().isoformat(),
-            'c', str(RECORD_COUNT)
-        ]
+    marc_source = pymarc.Field(
+        tag="598",
+        indicators=[" ", " "],
+        subfields=["a", source, "b", date.today().isoformat(), "c", str(RECORD_COUNT)],
      )
      record.add_ordered_field(marc_source)
  
+
  def create_clean_isbn(field):
      """Move 020a junk to 020q"""
-    
-    if not field.get_subfields('a') or ' ' not in field['a']:
+
+    if not field.get_subfields("a") or " " not in field["a"]:
          return field
  
-    isbn = pymarc.Field(
-        tag = '020',
-        indicators=[field.indicator1, field.indicator2]
-    )
+    isbn = pymarc.Field(tag="020", indicators=[field.indicator1, field.indicator2])
      for sf in field:
-        if sf[0] == 'a' and ' ' in sf[1]:
+        if sf[0] == "a" and " " in sf[1]:
              junk = sf[1].strip()
-            junk = junk[junk.find(' '):].strip()
-            isbn.add_subfield('a', clean_isbn(sf[1]))
-            isbn.add_subfield('q', junk)
+            junk = junk[junk.find(" ") :].strip()
+            isbn.add_subfield("a", clean_isbn(sf[1]))
+            isbn.add_subfield("q", junk)
          else:
              isbn.add_subfield(sf[0], sf[1])
      return isbn
  
+
  def process_urls(field, options, publisher):
      """Creates 856 fields required by Conifer"""
  
      new_fields = []
  
-    if not field['u']:
+    if not field["u"]:
          print("* No subfield 'u' found in this 856")
          return None
  
      # If we have a ToC or author notes or whatever, replace with content
-    if field['u'].find('.loc.gov') > -1:
+    if field["u"].find(".loc.gov") > -1:
          enrich = substitute_content(field)
          if enrich and isinstance(enrich, pymarc.field.Field):
              new_fields.append(enrich)
      else:
-        for lib in options['libraries']:
+        for lib in options["libraries"]:
  
              # Tweak for Algoma for combined CUP/OUP
-            if lib == 'algoma' and 'Cambridge' in publisher:
+            if lib == "algoma" and "Cambridge" in publisher:
                  continue
  
-            data = options['settings'].get_settings(lib)
+            data = options["settings"].get_settings(lib)
  
-            platform = options['platform']
-            if field['u'].find('books.scholarsportal') > -1:
-                platform = 'ScholarsPortal'
+            platform = options["platform"]
+            if field["u"].find("books.scholarsportal") > -1:
+                platform = "ScholarsPortal"
              subs = get_subfields(field, data, platform)
-            eight_five_six = pymarc.Field(tag = '856',
-                indicators = ['4', '0'],
-                subfields = subs
+            eight_five_six = pymarc.Field(
+                tag="856", indicators=["4", "0"], subfields=subs
              )
              new_fields.append(eight_five_six)
  
      return new_fields
  
+
  def substitute_content(field):
      """Parses a ToC or author notes URL and generates a field"""
  
-    url = field['u']
+    url = field["u"]
  
      content_field = None
-    raw_content = ''
+    raw_content = ""
  
      # Skip machine-generated tables of contents
-    if url.find('/toc/') > -1:
+    if url.find("/toc/") > -1:
          return None
  
      # Get the data from the supplied URL
-    headers = {'user-agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0'}
+    headers = {
+        "user-agent": "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0"
+    }
      req = requests.get(url, headers=headers)
      try:
          req.raise_for_status()
@@ -1121,56 +1202,49 @@ def substitute_content(field):
      if not content:
          return None
  
-    if url.endswith('-b.html'):
-    # Biographical note
+    if url.endswith("-b.html"):
+        # Biographical note
          content_field = pymarc.Field(
-            tag = '545',
-            indicators = ['1', ' '],
-            subfields = ['a', content]
+            tag="545", indicators=["1", " "], subfields=["a", content]
          )
-    elif url.endswith('-d.html'):
-    # Summary written by publisher
+    elif url.endswith("-d.html"):
+        # Summary written by publisher
          content_field = pymarc.Field(
-            tag = '520',
-            indicators = ['3', ' '],
-            subfields = ['a', content]
+            tag="520", indicators=["3", " "], subfields=["a", content]
          )
  
-    elif url.endswith('-t.html'):
-    # Table of contents
+    elif url.endswith("-t.html"):
+        # Table of contents
          content_field = pymarc.Field(
-            tag = '505',
-            indicators = [' ', ' '],
-            subfields = ['a', content]
+            tag="505", indicators=[" ", " "], subfields=["a", content]
          )
      else:
          print("URL %s didn't match known LoC type" % (url))
  
      return content_field
  
+
  def process_loc_data(raw_content):
      """Given the LoC enriched data, make it usable"""
  
      # Short-circuit if we have an OCRed ToC; the quality is terrible
-    if raw_content.find(text='Electronic data is machine generated'):
+    if raw_content.find(text="Electronic data is machine generated"):
          return None
-    elif raw_content.find('<pre>'):
+    elif raw_content.find("<pre>"):
          return None
  
      # Get all of the text after the horizontal rule
-    content = ' '.join(
-        raw_content.find('hr').findAllNext(text=True)
-    )
+    content = " ".join(raw_content.find("hr").findAllNext(text=True))
  
      # Remove linefeeds
-    content = content.replace('\n', ' ')
-    content = content.replace('\r', ' ')
+    content = content.replace("\n", " ")
+    content = content.replace("\r", " ")
  
      # Replace multiple contiguous whitespace with a single space
-    content = re.sub(r'\s+', r' ', content)
+    content = re.sub(r"\s+", r" ", content)
  
      # Remove inline subject headings to avoid too much indexing boost
-    lcsh = content.find('Library of Congress subject headings')
+    lcsh = content.find("Library of Congress subject headings")
      if lcsh > -1:
          content = content[0:lcsh]
  
@@ -1179,57 +1253,54 @@ def process_loc_data(raw_content):
  
      return content
  
+
  def get_subfields(field, data, platform):
      """Creates 856 subfields required by Conifer"""
  
      subs = []
-    url = field['u']
+    url = field["u"]
  
      # Is this an ebrary URL?
      ebrary = False
-    if url.find('.ebrary.com') > -1:
+    if url.find(".ebrary.com") > -1:
          ebrary = True
  
      # ebrary URLs look like: http://site.ebrary.com/lib/<channel>/Doc?id=2001019
      # we need to replace <channel> with the library-specific channel
      if ebrary:
-        ebrary_url = re.search(r'^(.+?/lib/).+?(/.+?)$', url)
-        url = ebrary_url.group(1) + data['ebrary_code'] + ebrary_url.group(2)
+        ebrary_url = re.search(r"^(.+?/lib/).+?(/.+?)$", url)
+        url = ebrary_url.group(1) + data["ebrary_code"] + ebrary_url.group(2)
  
      # Only Boreal still wants proxied ebrary links
-    if ebrary and data['ebrary_code'] != 'ocls':
-        subs.extend(['u', url])
+    if ebrary and data["ebrary_code"] != "ocls":
+        subs.extend(["u", url])
      else:
-        if (data['ebrary_code'] == 'ocls' and
-            re.search(r'ra.ocls.ca', field['u'])
-        ):
-            subs.extend(['u', field['u']])
+        if data["ebrary_code"] == "ocls" and re.search(r"ra.ocls.ca", field["u"]):
+            subs.extend(["u", field["u"]])
          else:
-            subs.extend(['u', data['proxy'] + field['u']])
+            subs.extend(["u", data["proxy"] + field["u"]])
  
      # Check for a $z as the first 856; in Springer records, at least, this
      # indicates a multi-volume set that requires keeping the $z around
-    if field.subfields[0] == 'z' and (
+    if field.subfields[0] == "z" and (
          # However, we don't want to keep garbage-y public notes
-        not field.get_subfields('z')[0].startswith('Connect to MyiLibrary')
+        not field.get_subfields("z")[0].startswith("Connect to MyiLibrary")
      ):
          subs.extend([field.subfields[0], field.subfields[1]])
  
-    link_text = data['link_text']
+    link_text = data["link_text"]
      # We don't know what the 956 platform is
-    if platform and field.tag != '956':
-        link_text = "%s (%s)" % (data['link_text'], platform)
-    elif url.find('springer.com') > -1:
-        link_text = "%s (%s)" % (data['link_text'], 'Springer')
+    if platform and field.tag != "956":
+        link_text = "%s (%s)" % (data["link_text"], platform)
+    elif url.find("springer.com") > -1:
+        link_text = "%s (%s)" % (data["link_text"], "Springer")
      else:
-        link_text = "%s" % (data['link_text'])
-    subs.extend([
-            'y', link_text,
-            '9', data['code']
-    ])
+        link_text = "%s" % (data["link_text"])
+    subs.extend(["y", link_text, "9", data["code"]])
  
      return subs
  
+
  def process_xml(record):
      global OPTIONS
      global FILES
@@ -1237,17 +1308,18 @@ def process_xml(record):
      files = FILES
      process_record(record, options, files)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      OPTIONS = parse_opts()
-    for fname in ('duplicate', 'tcn', 'url', 'sample', 'output'):
+    for fname in ("duplicate", "tcn", "url", "sample", "output"):
          if fname in OPTIONS:
              try:
-                if 'to-format' in OPTIONS and OPTIONS['to-format'] == 'xml':
-                    FILES[fname] = codecs.open(OPTIONS[fname], 'wb', 'utf-8')
+                if "to-format" in OPTIONS and OPTIONS["to-format"] == "xml":
+                    FILES[fname] = codecs.open(OPTIONS[fname], "wb", "utf-8")
                  else:
-                    FILES[fname] = pymarc.MARCWriter(open(OPTIONS[fname], 'wb'))
+                    FILES[fname] = pymarc.MARCWriter(open(OPTIONS[fname], "wb"))
              except Exception as ex:
                  print("Could not open output file [%s]: %s" % (OPTIONS[fname], ex))
  
      process_marc(OPTIONS)
-    #pymarc.map_xml(process_xml, '/home/dan/Downloads/AlexanderStreetPress_JazzMusicLibrary_Canada_MONOSER_2012-05-23.xml')
+    # pymarc.map_xml(process_xml, '/home/dan/Downloads/AlexanderStreetPress_JazzMusicLibrary_Canada_MONOSER_2012-05-23.xml')
author	Dan Scott <dan@coffeecode.net>
	Tue, 21 Jan 2020 02:35:35 +0000 (21:35 -0500)
committer	Dan Scott <dan@coffeecode.net>
	Tue, 21 Jan 2020 02:35:35 +0000 (21:35 -0500)