From b71fd376970cf596dcb4575dd1ecb5bdc3ca195b Mon Sep 17 00:00:00 2001 From: Dan Scott Date: Thu, 4 Oct 2012 14:45:56 -0400 Subject: [PATCH] Ebooks script code cleanup Lots of accumulated whitespace / line length code convention issues. This makes pylint happier (from 8.65 to 9.44). Signed-off-by: Dan Scott --- tools/ebooks/prep_ebook_records.py | 119 ++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 54 deletions(-) diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py index 7ce07e1406..14de27f0f6 100644 --- a/tools/ebooks/prep_ebook_records.py +++ b/tools/ebooks/prep_ebook_records.py @@ -51,7 +51,7 @@ class Institution(): "link_text": "Disponible en ligne", \ "access_note": u"Accès réservé aux utilisateurs avec un ID valide Collège Boréal ;" \ } - + self.laurentian = { \ "code": "LUSYS", \ "lac_symbol": "OSUL", \ @@ -79,7 +79,6 @@ class Institution(): def get_settings(self, lib): """Return the settings for a library by name""" return getattr(self, lib) - def do_help(): ''' @@ -125,7 +124,7 @@ Required arguments: -P / --platform: The name of the digital platform to be inserted in a 710 field. - + -A / --algoma: Add an 856 for Algoma University -B / --boreal: Add an 856 for College Boreal @@ -265,11 +264,13 @@ def check_options(options): return clean_opts def evergreen_request(method, *args, **kwargs): + """Issue a basic gateway request against Evergreen""" + service = '.'.join(method.split('.')[:2]) kwargs.update({'service':service, 'method':method}) - params = ['%s=%s' % (k,quote(v)) for k,v in kwargs.items()] + params = ['%s=%s' % (k, quote(v)) for k, v in kwargs.items()] params += ['param=%s' % quote(json.dumps(a)) for a in args] - url = '%s?%s' % (GATEWAY_URL, '&'.join(params)) + url = '%s?%s' % (GATEWAY_URL, '&'.join(params)) #print '--->', url req = urllib2.urlopen(url) resp = json.load(req) @@ -280,6 +281,8 @@ def evergreen_request(method, *args, **kwargs): return payload def url_check(record, options): + """Check for a matching URL in Evergreen""" + global DUP_COUNT, RECORD_COUNT match = False @@ -305,13 +308,15 @@ def url_check(record, options): return match_id, match def tcn_check(record): + """Check for a matching TCN in Evergreen""" + global DUP_COUNT, RECORD_COUNT match = False match_id = 0 for tcn in record.get_fields('001'): tcn_val = tcn.value() - tcn_info = evergreen_request(OPENSRF_TCN_CALL,tcn_val) + tcn_info = evergreen_request(OPENSRF_TCN_CALL, tcn_val) bib_ids = tcn_info[0]['ids'] # print "tcn_info", tcn_info for bib_id in bib_ids: @@ -323,6 +328,8 @@ def tcn_check(record): return match_id, match def isbn_check(record): + """Check for a matching ISBN in Evergreen""" + global DUP_COUNT, RECORD_COUNT match = False @@ -330,8 +337,7 @@ def isbn_check(record): for isbn in record.get_fields('020', '024'): for isbnval in isbn.get_subfields('a', 'z'): isbn_val = clean_isbn(isbnval) - isbn_info = evergreen_request(OPENSRF_ISBN_CALL,isbnval) - match_count = isbn_info[0]['count'] + isbn_info = evergreen_request(OPENSRF_ISBN_CALL, isbnval) #print "count", isbn_info[0]['count'] bib_ids = isbn_info[0]['ids'] for bib_id in bib_ids: @@ -354,8 +360,8 @@ def append_period(text): def check_libraries(options): """Build a dict of the libraries that were requested for this batch""" - - _libraries = dict() + + _libraries = dict() for lib in ['algoma', 'boreal', 'laurentian', 'windsor']: if '--' + lib in options: _libraries[lib] = True @@ -367,17 +373,18 @@ def parse_opts(): """Get command-line arguments from the script""" try: _short_opts = 'i:o:a:c:p:P:ABLWe:d:t:u:n:s:h' - _long_opts = ['input=', 'output=', 'authorization=', 'consortium=', - 'publisher=', 'platform=', 'algoma', 'boreal', 'laurentian', 'windsor', 'ebrary', - 'duplicate=', 'tcn=', 'url=', 'note=','sample=', 'help' + _long_opts = ['input=', 'output=', 'authorization=', 'consortium=', + 'publisher=', 'platform=', 'algoma', 'boreal', 'laurentian', + 'windsor', 'ebrary', 'duplicate=', 'tcn=', 'url=', 'note=', + 'sample=', 'help' ] - opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts) + opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts) except getopt.GetoptError, ex: print "* %s" % str(ex) do_help() _options = consolidate_options(opts[0]) - return check_options(_options) + return check_options(_options) def process_records(options): """Converts raw ebook MARC records to Conifer-ready MARC records""" @@ -400,24 +407,24 @@ def process_records(options): writer = pymarc.MARCWriter(open(options['output'], mode='wb')) except Exception, ex: print("Could not open output file [%s]" % options['output']) - + if 'duplicate' in options: - try: + try: duplicate = pymarc.MARCWriter(open(options['duplicate'], mode='wb')) - except Exception, ex: - print("Could not open output file [%s]" % options['duplicate']) + except Exception, ex: + print("Could not open output file [%s]" % options['duplicate']) if 'tcn' in options: - try: + try: tcn = pymarc.MARCWriter(open(options['tcn'], mode='wb')) - except Exception, ex: - print("Could not open output file [%s]" % options['tcn']) + except Exception, ex: + print("Could not open output file [%s]" % options['tcn']) if 'url' in options: - try: + try: url = pymarc.MARCWriter(open(options['url'], mode='wb')) - except Exception, ex: - print("Could not open output file [%s]" % options['url']) + except Exception, ex: + print("Could not open output file [%s]" % options['url']) if 'sample' in options: sample = pymarc.MARCWriter(open(options['sample'], mode='wb')) @@ -430,36 +437,38 @@ def process_records(options): % (RECORD_COUNT, options['input']) ) else: - print ("%d - %s\n" % (RECORD_COUNT,record['856'])) + print ("%d - %s\n" % (RECORD_COUNT, record['856'])) new_record = '' dup_flag = False if duplicate: - bib_id, dup_flag = isbn_check(record) - new_record = process_fields(record, options, bib_id, dup_flag) - if dup_flag: - duplicate.write(new_record) + bib_id, dup_flag = isbn_check(record) + new_record = process_fields(record, options, bib_id, dup_flag) + if dup_flag: + duplicate.write(new_record) if tcn: - bib_id, dup_flag = tcn_check(record) - new_record = process_fields(record, options, bib_id, dup_flag) - if dup_flag: - tcn.write(new_record) + bib_id, dup_flag = tcn_check(record) + new_record = process_fields(record, options, bib_id, dup_flag) + if dup_flag: + tcn.write(new_record) if url: - bib_id, dup_flag = url_check(record, options) - new_record = process_fields(record, options, bib_id, dup_flag) - if dup_flag: - url.write(new_record) + bib_id, dup_flag = url_check(record, options) + new_record = process_fields(record, options, bib_id, dup_flag) + if dup_flag: + url.write(new_record) if not dup_flag: new_record = process_fields(record, options, 0, False) else: DUP_COUNT += 1 - + if new_record: writer.write(new_record) - if (sample and ((RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0))): + if (sample and ( + (RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0) + )): sample.write(new_record) except Exception, ex: print("* Error processing record %d - %s" % (RECORD_COUNT, ex)) @@ -484,7 +493,7 @@ def process_fields(record, options, bib_id, dup_flag): record.add_field(note) # 909 - if dup_flag: + if dup_flag: dup_value = bib_id + "" dup = pymarc.Field(tag = '909', indicators = [' ', ' '], @@ -663,7 +672,8 @@ def clean_diacritics(field): global RECORD_COUNT if r'\x' in repr(tmpsf): print " * %d Hex value found in %s:%s - [%s] [%s]" % ( - RECORD_COUNT, field.tag, subfield[0], tmpsf.encode('utf8'), repr(tmpsf) + RECORD_COUNT, field.tag, subfield[0], + tmpsf.encode('utf8'), repr(tmpsf) ) if (repr(subfield[1]) != repr(tmpsf)): @@ -672,7 +682,6 @@ def clean_diacritics(field): ) return new_field - def add_publisher(record, options): """ @@ -703,7 +712,7 @@ def add_publisher(record, options): for pub in sten.get_subfields('a'): if pub == publisher: munge_publisher = True - for rel in sten.get_subfields('4'): + for rel in sten.get_subfields('4'): if rel == 'pbl': need_publisher = False need_relator = False @@ -752,7 +761,7 @@ def add_platform(record, options): ] ) record.add_field(seven_ten) - + def mark_isbn_for_sfx(record, options): """ Adds a $9 subfield to the 020 (ISBN) field to use for SFX look-ups @@ -784,7 +793,7 @@ def mark_isbn_for_sfx(record, options): indicators = ['8', ' '], subfields = [ 'a', scn.value(), - '9', 'SFX' + '9', 'SFX' ] ) @@ -842,11 +851,11 @@ def clean_isbn(isbn): isbn = isbn.strip() # Grab the first string beginning with a digit - isbn_match = re.search(r'^[\D]*([\d]+\S+).*?$', isbn) + isbn_match = re.search(r'^[\D]*([\d]+\S+).*?$', isbn) if not isbn_match.group(1): return None - + # Replace hyphens isbn = isbn_match.group(1).replace('-', '') @@ -859,7 +868,7 @@ def add_restriction(new_record, options, publisher): The 506 field includes the following subfields: * $a - Standard text to display * $b - Jurisdiction (identifies the consortial license) - * $e - Authorization (online platform that enforces authorization) + * $e - Authorization (online platform that enforces authorization) * $9 - Institutional code to which this note applies """ @@ -928,7 +937,7 @@ def add_marc_source(record, options): """ global RECORD_COUNT - + source = os.path.basename(options['input']) marc_source = pymarc.Field(tag = '598', @@ -966,7 +975,7 @@ def process_urls(field, options, publisher): subs = get_subfields(field, data) eight_five_six = pymarc.Field(tag = '856', indicators = ['4', '0'], - subfields = subs + subfields = subs ) new_fields.append(eight_five_six) @@ -1064,18 +1073,20 @@ def get_subfields(field, data): ebrary = False if url.find('.ebrary.com') > -1: ebrary = True - + # ebrary URLs look like: http://site.ebrary.com/lib//Doc?id=2001019 # we need to replace with the library-specific channel if ebrary: - ebrary_url = re.search(r'^(.+?/lib/).+?(/.+?)$', url) + ebrary_url = re.search(r'^(.+?/lib/).+?(/.+?)$', url) url = ebrary_url.group(1) + data['ebrary_code'] + ebrary_url.group(2) # Only Boreal still wants proxied ebrary links if ebrary and data['ebrary_code'] != 'ocls': subs.extend(['u', url]) else: - if data['ebrary_code'] == 'ocls' and re.search(r'ra.ocls.ca', field['u']): + if (data['ebrary_code'] == 'ocls' and + re.search(r'ra.ocls.ca', field['u']) + ): subs.extend(['u', field['u']]) else: subs.extend(['u', data['proxy'] + field['u']]) -- 2.11.0