Ebooks script code cleanup
authorDan Scott <dscott@laurentian.ca>
Thu, 4 Oct 2012 18:45:56 +0000 (14:45 -0400)
committerDan Scott <dscott@laurentian.ca>
Tue, 7 May 2013 18:58:06 +0000 (14:58 -0400)
Lots of accumulated whitespace / line length code convention issues.
This makes pylint happier (from 8.65 to 9.44).

Signed-off-by: Dan Scott <dscott@laurentian.ca>
tools/ebooks/prep_ebook_records.py

index 7ce07e1..14de27f 100644 (file)
@@ -51,7 +51,7 @@ class Institution():
             "link_text": "Disponible en ligne", \
             "access_note": u"Accès réservé aux utilisateurs avec un ID valide Collège Boréal ;" \
         }
-        
+
         self.laurentian = { \
             "code": "LUSYS", \
             "lac_symbol": "OSUL", \
@@ -79,7 +79,6 @@ class Institution():
     def get_settings(self, lib):
         """Return the settings for a library by name"""
         return getattr(self, lib)
-    
 
 def do_help():
     '''
@@ -125,7 +124,7 @@ Required arguments:
 
     -P / --platform: The name of the digital platform to be inserted in a 710
                      field.
+
     -A / --algoma: Add an 856 for Algoma University
 
     -B / --boreal: Add an 856 for College Boreal
@@ -265,11 +264,13 @@ def check_options(options):
     return clean_opts
 
 def evergreen_request(method, *args, **kwargs):
+    """Issue a basic gateway request against Evergreen"""
+
     service = '.'.join(method.split('.')[:2])
     kwargs.update({'service':service, 'method':method})
-    params =  ['%s=%s' % (k,quote(v)) for k,v in kwargs.items()]
+    params =  ['%s=%s' % (k, quote(v)) for k, v in kwargs.items()]
     params += ['param=%s' % quote(json.dumps(a)) for a in args]
-    url = '%s?%s' % (GATEWAY_URL, '&'.join(params)) 
+    url = '%s?%s' % (GATEWAY_URL, '&'.join(params))
     #print '--->', url
     req = urllib2.urlopen(url)
     resp = json.load(req)
@@ -280,6 +281,8 @@ def evergreen_request(method, *args, **kwargs):
     return payload
 
 def url_check(record, options):
+    """Check for a matching URL in Evergreen"""
+
     global DUP_COUNT, RECORD_COUNT
 
     match = False
@@ -305,13 +308,15 @@ def url_check(record, options):
     return match_id, match
 
 def tcn_check(record):
+    """Check for a matching TCN in Evergreen"""
+
     global DUP_COUNT, RECORD_COUNT
 
     match = False
     match_id = 0
     for tcn in record.get_fields('001'):
         tcn_val = tcn.value()
-        tcn_info = evergreen_request(OPENSRF_TCN_CALL,tcn_val)
+        tcn_info = evergreen_request(OPENSRF_TCN_CALL, tcn_val)
         bib_ids = tcn_info[0]['ids']
         # print "tcn_info", tcn_info
         for bib_id in bib_ids:
@@ -323,6 +328,8 @@ def tcn_check(record):
     return match_id, match
 
 def isbn_check(record):
+    """Check for a matching ISBN in Evergreen"""
+
     global DUP_COUNT, RECORD_COUNT
 
     match = False
@@ -330,8 +337,7 @@ def isbn_check(record):
     for isbn in record.get_fields('020', '024'):
         for isbnval in isbn.get_subfields('a', 'z'):
             isbn_val = clean_isbn(isbnval)
-            isbn_info = evergreen_request(OPENSRF_ISBN_CALL,isbnval)
-            match_count = isbn_info[0]['count']
+            isbn_info = evergreen_request(OPENSRF_ISBN_CALL, isbnval)
             #print "count", isbn_info[0]['count']
             bib_ids = isbn_info[0]['ids']
             for bib_id in bib_ids:
@@ -354,8 +360,8 @@ def append_period(text):
 
 def check_libraries(options):
     """Build a dict of the libraries that were requested for this batch"""
-    
-    _libraries = dict() 
+
+    _libraries = dict()
     for lib in ['algoma', 'boreal', 'laurentian', 'windsor']:
         if '--' + lib in options:
             _libraries[lib] = True
@@ -367,17 +373,18 @@ def parse_opts():
     """Get command-line arguments from the script"""
     try:
         _short_opts = 'i:o:a:c:p:P:ABLWe:d:t:u:n:s:h'
-        _long_opts = ['input=', 'output=', 'authorization=', 'consortium=', 
-            'publisher=', 'platform=', 'algoma', 'boreal', 'laurentian', 'windsor', 'ebrary',
-            'duplicate=', 'tcn=', 'url=', 'note=','sample=', 'help'
+        _long_opts = ['input=', 'output=', 'authorization=', 'consortium=',
+            'publisher=', 'platform=', 'algoma', 'boreal', 'laurentian',
+            'windsor', 'ebrary', 'duplicate=', 'tcn=', 'url=', 'note=',
+            'sample=', 'help'
         ]
-        opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts) 
+        opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts)
     except getopt.GetoptError, ex:
         print "* %s" % str(ex)
         do_help()
 
     _options = consolidate_options(opts[0])
-    return check_options(_options)    
+    return check_options(_options)
 
 def process_records(options):
     """Converts raw ebook MARC records to Conifer-ready MARC records"""
@@ -400,24 +407,24 @@ def process_records(options):
         writer = pymarc.MARCWriter(open(options['output'], mode='wb'))
     except Exception, ex:
         print("Could not open output file [%s]" % options['output'])
-        
+
     if 'duplicate' in options:
-       try:
+        try:
             duplicate = pymarc.MARCWriter(open(options['duplicate'], mode='wb'))
-       except Exception, ex:
-           print("Could not open output file [%s]" % options['duplicate'])
+        except Exception, ex:
+            print("Could not open output file [%s]" % options['duplicate'])
 
     if 'tcn' in options:
-       try:
+        try:
             tcn = pymarc.MARCWriter(open(options['tcn'], mode='wb'))
-       except Exception, ex:
-           print("Could not open output file [%s]" % options['tcn'])
+        except Exception, ex:
+            print("Could not open output file [%s]" % options['tcn'])
 
     if 'url' in options:
-       try:
+        try:
             url = pymarc.MARCWriter(open(options['url'], mode='wb'))
-       except Exception, ex:
-           print("Could not open output file [%s]" % options['url'])
+        except Exception, ex:
+            print("Could not open output file [%s]" % options['url'])
 
     if 'sample' in options:
         sample = pymarc.MARCWriter(open(options['sample'], mode='wb'))
@@ -430,36 +437,38 @@ def process_records(options):
                         % (RECORD_COUNT, options['input'])
                 )
             else:
-                print ("%d - %s\n" % (RECORD_COUNT,record['856']))
+                print ("%d - %s\n" % (RECORD_COUNT, record['856']))
 
 
             new_record = ''
             dup_flag = False
 
             if duplicate:
-               bib_id, dup_flag = isbn_check(record)
-               new_record = process_fields(record, options, bib_id, dup_flag)
-               if dup_flag:
-                  duplicate.write(new_record)
+                bib_id, dup_flag = isbn_check(record)
+                new_record = process_fields(record, options, bib_id, dup_flag)
+                if dup_flag:
+                    duplicate.write(new_record)
             if tcn:
-               bib_id, dup_flag = tcn_check(record)
-               new_record = process_fields(record, options, bib_id, dup_flag)
-               if dup_flag:
-                  tcn.write(new_record)
+                bib_id, dup_flag = tcn_check(record)
+                new_record = process_fields(record, options, bib_id, dup_flag)
+                if dup_flag:
+                    tcn.write(new_record)
             if url:
-               bib_id, dup_flag = url_check(record, options)
-               new_record = process_fields(record, options, bib_id, dup_flag)
-               if dup_flag:
-                  url.write(new_record)
+                bib_id, dup_flag = url_check(record, options)
+                new_record = process_fields(record, options, bib_id, dup_flag)
+                if dup_flag:
+                    url.write(new_record)
 
             if not dup_flag:
                 new_record = process_fields(record, options, 0, False)
             else:
                 DUP_COUNT += 1
-            
+
             if new_record:
                 writer.write(new_record)
-                if (sample and ((RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0))):
+                if (sample and (
+                    (RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0)
+                )):
                     sample.write(new_record)
         except Exception, ex:
             print("* Error processing record %d - %s" % (RECORD_COUNT, ex))
@@ -484,7 +493,7 @@ def process_fields(record, options, bib_id, dup_flag):
         record.add_field(note)
 
     # 909
-    if dup_flag: 
+    if dup_flag:
         dup_value = bib_id + ""
         dup = pymarc.Field(tag = '909',
             indicators = [' ', ' '],
@@ -663,7 +672,8 @@ def clean_diacritics(field):
         global RECORD_COUNT
         if r'\x' in repr(tmpsf):
             print " * %d Hex value found in %s:%s - [%s] [%s]" % (
-                RECORD_COUNT, field.tag, subfield[0], tmpsf.encode('utf8'), repr(tmpsf)
+                RECORD_COUNT, field.tag, subfield[0],
+                tmpsf.encode('utf8'), repr(tmpsf)
             )
 
         if (repr(subfield[1]) != repr(tmpsf)):
@@ -672,7 +682,6 @@ def clean_diacritics(field):
             )
 
     return new_field
-        
 
 def add_publisher(record, options):
     """
@@ -703,7 +712,7 @@ def add_publisher(record, options):
         for pub in sten.get_subfields('a'):
             if pub == publisher:
                 munge_publisher = True
-                for rel in sten.get_subfields('4'): 
+                for rel in sten.get_subfields('4'):
                     if rel == 'pbl':
                         need_publisher = False
                         need_relator = False
@@ -752,7 +761,7 @@ def add_platform(record, options):
             ]
         )
         record.add_field(seven_ten)
+
 def mark_isbn_for_sfx(record, options):
     """
     Adds a $9 subfield to the 020 (ISBN) field to use for SFX look-ups
@@ -784,7 +793,7 @@ def mark_isbn_for_sfx(record, options):
                 indicators = ['8', ' '],
                 subfields = [
                     'a', scn.value(),
-                    '9', 'SFX' 
+                    '9', 'SFX'
                 ]
             )
 
@@ -842,11 +851,11 @@ def clean_isbn(isbn):
     isbn = isbn.strip()
 
     # Grab the first string beginning with a digit
-    isbn_match = re.search(r'^[\D]*([\d]+\S+).*?$', isbn) 
+    isbn_match = re.search(r'^[\D]*([\d]+\S+).*?$', isbn)
 
     if not isbn_match.group(1):
         return None
-     
+
     # Replace hyphens
     isbn = isbn_match.group(1).replace('-', '')
 
@@ -859,7 +868,7 @@ def add_restriction(new_record, options, publisher):
     The 506 field includes the following subfields:
       * $a - Standard text to display
       * $b - Jurisdiction (identifies the consortial license)
-      * $e - Authorization (online platform that enforces authorization) 
+      * $e - Authorization (online platform that enforces authorization)
       * $9 - Institutional code to which this note applies
     """
 
@@ -928,7 +937,7 @@ def add_marc_source(record, options):
     """
 
     global RECORD_COUNT
+
     source = os.path.basename(options['input'])
 
     marc_source = pymarc.Field(tag = '598',
@@ -966,7 +975,7 @@ def process_urls(field, options, publisher):
             subs = get_subfields(field, data)
             eight_five_six = pymarc.Field(tag = '856',
                 indicators = ['4', '0'],
-                subfields = subs 
+                subfields = subs
             )
             new_fields.append(eight_five_six)
 
@@ -1064,18 +1073,20 @@ def get_subfields(field, data):
     ebrary = False
     if url.find('.ebrary.com') > -1:
         ebrary = True
-        
+
     # ebrary URLs look like: http://site.ebrary.com/lib/<channel>/Doc?id=2001019
     # we need to replace <channel> with the library-specific channel
     if ebrary:
-        ebrary_url = re.search(r'^(.+?/lib/).+?(/.+?)$', url) 
+        ebrary_url = re.search(r'^(.+?/lib/).+?(/.+?)$', url)
         url = ebrary_url.group(1) + data['ebrary_code'] + ebrary_url.group(2)
 
     # Only Boreal still wants proxied ebrary links
     if ebrary and data['ebrary_code'] != 'ocls':
         subs.extend(['u', url])
     else:
-        if data['ebrary_code'] == 'ocls' and re.search(r'ra.ocls.ca', field['u']):
+        if (data['ebrary_code'] == 'ocls' and 
+            re.search(r'ra.ocls.ca', field['u'])
+        ):
             subs.extend(['u', field['u']])
         else:
             subs.extend(['u', data['proxy'] + field['u']])