Ebooks script code cleanup

author Dan Scott <dscott@laurentian.ca>

Thu, 4 Oct 2012 18:45:56 +0000 (14:45 -0400)

committer Dan Scott <dscott@laurentian.ca>

Tue, 7 May 2013 18:58:06 +0000 (14:58 -0400)
author Dan Scott <dscott@laurentian.ca>
Thu, 4 Oct 2012 18:45:56 +0000 (14:45 -0400)
committer Dan Scott <dscott@laurentian.ca>
Tue, 7 May 2013 18:58:06 +0000 (14:58 -0400)
diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py

index 7ce07e1..14de27f 100644 (file)
--- a/tools/ebooks/prep_ebook_records.py
+++ b/tools/ebooks/prep_ebook_records.py
@@ -51,7 +51,7 @@ class Institution():
              "link_text": "Disponible en ligne", \
              "access_note": u"Accès réservé aux utilisateurs avec un ID valide Collège Boréal ;" \
          }
-        
+
          self.laurentian = { \
              "code": "LUSYS", \
              "lac_symbol": "OSUL", \
@@ -79,7 +79,6 @@ class Institution():
      def get_settings(self, lib):
          """Return the settings for a library by name"""
          return getattr(self, lib)
-    
  
  def do_help():
      '''
@@ -125,7 +124,7 @@ Required arguments:
  
      -P / --platform: The name of the digital platform to be inserted in a 710
                       field.
- 
+
      -A / --algoma: Add an 856 for Algoma University
  
      -B / --boreal: Add an 856 for College Boreal
@@ -265,11 +264,13 @@ def check_options(options):
      return clean_opts
  
  def evergreen_request(method, *args, **kwargs):
+    """Issue a basic gateway request against Evergreen"""
+
      service = '.'.join(method.split('.')[:2])
      kwargs.update({'service':service, 'method':method})
-    params =  ['%s=%s' % (k,quote(v)) for k,v in kwargs.items()]
+    params =  ['%s=%s' % (k, quote(v)) for k, v in kwargs.items()]
      params += ['param=%s' % quote(json.dumps(a)) for a in args]
-    url = '%s?%s' % (GATEWAY_URL, '&'.join(params)) 
+    url = '%s?%s' % (GATEWAY_URL, '&'.join(params))
      #print '--->', url
      req = urllib2.urlopen(url)
      resp = json.load(req)
@@ -280,6 +281,8 @@ def evergreen_request(method, *args, **kwargs):
      return payload
  
  def url_check(record, options):
+    """Check for a matching URL in Evergreen"""
+
      global DUP_COUNT, RECORD_COUNT
  
      match = False
@@ -305,13 +308,15 @@ def url_check(record, options):
      return match_id, match
  
  def tcn_check(record):
+    """Check for a matching TCN in Evergreen"""
+
      global DUP_COUNT, RECORD_COUNT
  
      match = False
      match_id = 0
      for tcn in record.get_fields('001'):
          tcn_val = tcn.value()
-        tcn_info = evergreen_request(OPENSRF_TCN_CALL,tcn_val)
+        tcn_info = evergreen_request(OPENSRF_TCN_CALL, tcn_val)
          bib_ids = tcn_info[0]['ids']
          # print "tcn_info", tcn_info
          for bib_id in bib_ids:
@@ -323,6 +328,8 @@ def tcn_check(record):
      return match_id, match
  
  def isbn_check(record):
+    """Check for a matching ISBN in Evergreen"""
+
      global DUP_COUNT, RECORD_COUNT
  
      match = False
@@ -330,8 +337,7 @@ def isbn_check(record):
      for isbn in record.get_fields('020', '024'):
          for isbnval in isbn.get_subfields('a', 'z'):
              isbn_val = clean_isbn(isbnval)
-            isbn_info = evergreen_request(OPENSRF_ISBN_CALL,isbnval)
-            match_count = isbn_info[0]['count']
+            isbn_info = evergreen_request(OPENSRF_ISBN_CALL, isbnval)
              #print "count", isbn_info[0]['count']
              bib_ids = isbn_info[0]['ids']
              for bib_id in bib_ids:
@@ -354,8 +360,8 @@ def append_period(text):
  
  def check_libraries(options):
      """Build a dict of the libraries that were requested for this batch"""
-    
-    _libraries = dict() 
+
+    _libraries = dict()
      for lib in ['algoma', 'boreal', 'laurentian', 'windsor']:
          if '--' + lib in options:
              _libraries[lib] = True
@@ -367,17 +373,18 @@ def parse_opts():
      """Get command-line arguments from the script"""
      try:
          _short_opts = 'i:o:a:c:p:P:ABLWe:d:t:u:n:s:h'
-        _long_opts = ['input=', 'output=', 'authorization=', 'consortium=', 
-            'publisher=', 'platform=', 'algoma', 'boreal', 'laurentian', 'windsor', 'ebrary',
-            'duplicate=', 'tcn=', 'url=', 'note=','sample=', 'help'
+        _long_opts = ['input=', 'output=', 'authorization=', 'consortium=',
+            'publisher=', 'platform=', 'algoma', 'boreal', 'laurentian',
+            'windsor', 'ebrary', 'duplicate=', 'tcn=', 'url=', 'note=',
+            'sample=', 'help'
          ]
-        opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts) 
+        opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts)
      except getopt.GetoptError, ex:
          print "* %s" % str(ex)
          do_help()
  
      _options = consolidate_options(opts[0])
-    return check_options(_options)    
+    return check_options(_options)
  
  def process_records(options):
      """Converts raw ebook MARC records to Conifer-ready MARC records"""
@@ -400,24 +407,24 @@ def process_records(options):
          writer = pymarc.MARCWriter(open(options['output'], mode='wb'))
      except Exception, ex:
          print("Could not open output file [%s]" % options['output'])
-        
+
      if 'duplicate' in options:
-       try:
+        try:
              duplicate = pymarc.MARCWriter(open(options['duplicate'], mode='wb'))
-       except Exception, ex:
-           print("Could not open output file [%s]" % options['duplicate'])
+        except Exception, ex:
+            print("Could not open output file [%s]" % options['duplicate'])
  
      if 'tcn' in options:
-       try:
+        try:
              tcn = pymarc.MARCWriter(open(options['tcn'], mode='wb'))
-       except Exception, ex:
-           print("Could not open output file [%s]" % options['tcn'])
+        except Exception, ex:
+            print("Could not open output file [%s]" % options['tcn'])
  
      if 'url' in options:
-       try:
+        try:
              url = pymarc.MARCWriter(open(options['url'], mode='wb'))
-       except Exception, ex:
-           print("Could not open output file [%s]" % options['url'])
+        except Exception, ex:
+            print("Could not open output file [%s]" % options['url'])
  
      if 'sample' in options:
          sample = pymarc.MARCWriter(open(options['sample'], mode='wb'))
@@ -430,36 +437,38 @@ def process_records(options):
                          % (RECORD_COUNT, options['input'])
                  )
              else:
-                print ("%d - %s\n" % (RECORD_COUNT,record['856']))
+                print ("%d - %s\n" % (RECORD_COUNT, record['856']))
  
  
              new_record = ''
              dup_flag = False
  
              if duplicate:
-               bib_id, dup_flag = isbn_check(record)
-               new_record = process_fields(record, options, bib_id, dup_flag)
-               if dup_flag:
-                  duplicate.write(new_record)
+                bib_id, dup_flag = isbn_check(record)
+                new_record = process_fields(record, options, bib_id, dup_flag)
+                if dup_flag:
+                    duplicate.write(new_record)
              if tcn:
-               bib_id, dup_flag = tcn_check(record)
-               new_record = process_fields(record, options, bib_id, dup_flag)
-               if dup_flag:
-                  tcn.write(new_record)
+                bib_id, dup_flag = tcn_check(record)
+                new_record = process_fields(record, options, bib_id, dup_flag)
+                if dup_flag:
+                    tcn.write(new_record)
              if url:
-               bib_id, dup_flag = url_check(record, options)
-               new_record = process_fields(record, options, bib_id, dup_flag)
-               if dup_flag:
-                  url.write(new_record)
+                bib_id, dup_flag = url_check(record, options)
+                new_record = process_fields(record, options, bib_id, dup_flag)
+                if dup_flag:
+                    url.write(new_record)
  
              if not dup_flag:
                  new_record = process_fields(record, options, 0, False)
              else:
                  DUP_COUNT += 1
-            
+
              if new_record:
                  writer.write(new_record)
-                if (sample and ((RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0))):
+                if (sample and (
+                    (RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0)
+                )):
                      sample.write(new_record)
          except Exception, ex:
              print("* Error processing record %d - %s" % (RECORD_COUNT, ex))
@@ -484,7 +493,7 @@ def process_fields(record, options, bib_id, dup_flag):
          record.add_field(note)
  
      # 909
-    if dup_flag: 
+    if dup_flag:
          dup_value = bib_id + ""
          dup = pymarc.Field(tag = '909',
              indicators = [' ', ' '],
@@ -663,7 +672,8 @@ def clean_diacritics(field):
          global RECORD_COUNT
          if r'\x' in repr(tmpsf):
              print " * %d Hex value found in %s:%s - [%s] [%s]" % (
-                RECORD_COUNT, field.tag, subfield[0], tmpsf.encode('utf8'), repr(tmpsf)
+                RECORD_COUNT, field.tag, subfield[0],
+                tmpsf.encode('utf8'), repr(tmpsf)
              )
  
          if (repr(subfield[1]) != repr(tmpsf)):
@@ -672,7 +682,6 @@ def clean_diacritics(field):
              )
  
      return new_field
-        
  
  def add_publisher(record, options):
      """
@@ -703,7 +712,7 @@ def add_publisher(record, options):
          for pub in sten.get_subfields('a'):
              if pub == publisher:
                  munge_publisher = True
-                for rel in sten.get_subfields('4'): 
+                for rel in sten.get_subfields('4'):
                      if rel == 'pbl':
                          need_publisher = False
                          need_relator = False
@@ -752,7 +761,7 @@ def add_platform(record, options):
              ]
          )
          record.add_field(seven_ten)
- 
+
  def mark_isbn_for_sfx(record, options):
      """
      Adds a $9 subfield to the 020 (ISBN) field to use for SFX look-ups
@@ -784,7 +793,7 @@ def mark_isbn_for_sfx(record, options):
                  indicators = ['8', ' '],
                  subfields = [
                      'a', scn.value(),
-                    '9', 'SFX' 
+                    '9', 'SFX'
                  ]
              )
  
@@ -842,11 +851,11 @@ def clean_isbn(isbn):
      isbn = isbn.strip()
  
      # Grab the first string beginning with a digit
-    isbn_match = re.search(r'^[\D]*([\d]+\S+).*?$', isbn) 
+    isbn_match = re.search(r'^[\D]*([\d]+\S+).*?$', isbn)
  
      if not isbn_match.group(1):
          return None
-     
+
      # Replace hyphens
      isbn = isbn_match.group(1).replace('-', '')
  
@@ -859,7 +868,7 @@ def add_restriction(new_record, options, publisher):
      The 506 field includes the following subfields:
        * $a - Standard text to display
        * $b - Jurisdiction (identifies the consortial license)
-      * $e - Authorization (online platform that enforces authorization) 
+      * $e - Authorization (online platform that enforces authorization)
        * $9 - Institutional code to which this note applies
      """
  
@@ -928,7 +937,7 @@ def add_marc_source(record, options):
      """
  
      global RECORD_COUNT
- 
+
      source = os.path.basename(options['input'])
  
      marc_source = pymarc.Field(tag = '598',
@@ -966,7 +975,7 @@ def process_urls(field, options, publisher):
              subs = get_subfields(field, data)
              eight_five_six = pymarc.Field(tag = '856',
                  indicators = ['4', '0'],
-                subfields = subs 
+                subfields = subs
              )
              new_fields.append(eight_five_six)
  
@@ -1064,18 +1073,20 @@ def get_subfields(field, data):
      ebrary = False
      if url.find('.ebrary.com') > -1:
          ebrary = True
-        
+
      # ebrary URLs look like: http://site.ebrary.com/lib/<channel>/Doc?id=2001019
      # we need to replace <channel> with the library-specific channel
      if ebrary:
-        ebrary_url = re.search(r'^(.+?/lib/).+?(/.+?)$', url) 
+        ebrary_url = re.search(r'^(.+?/lib/).+?(/.+?)$', url)
          url = ebrary_url.group(1) + data['ebrary_code'] + ebrary_url.group(2)
  
      # Only Boreal still wants proxied ebrary links
      if ebrary and data['ebrary_code'] != 'ocls':
          subs.extend(['u', url])
      else:
-        if data['ebrary_code'] == 'ocls' and re.search(r'ra.ocls.ca', field['u']):
+        if (data['ebrary_code'] == 'ocls' and 
+            re.search(r'ra.ocls.ca', field['u'])
+        ):
              subs.extend(['u', field['u']])
          else:
              subs.extend(['u', data['proxy'] + field['u']])
author	Dan Scott <dscott@laurentian.ca>
	Thu, 4 Oct 2012 18:45:56 +0000 (14:45 -0400)
committer	Dan Scott <dscott@laurentian.ca>
	Tue, 7 May 2013 18:58:06 +0000 (14:58 -0400)