From: Dan Scott Date: Thu, 28 Jul 2011 15:53:27 +0000 (-0400) Subject: Revert inadvertent damage caused by last commit X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=5823eb9c54caa545d32b7c2ba9526e88c5e2227b;p=contrib%2FConifer.git Revert inadvertent damage caused by last commit Two different copies of the Conifer repo = damage! Signed-off-by: Dan Scott --- diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py index e753c9c2ff..cefe047e68 100644 --- a/tools/ebooks/prep_ebook_records.py +++ b/tools/ebooks/prep_ebook_records.py @@ -14,8 +14,11 @@ be accommodated in batch load. """ import os, os.path, sys, getopt, pymarc, pymarc.marc8, re, urllib2 +from datetime import date from BeautifulSoup import BeautifulSoup +RECORD_COUNT = 0 + class Institution(): """Defines standard settings for each Conifer institution""" @@ -27,7 +30,7 @@ class Institution(): "proxy": "http://libproxy.auc.ca/login?url=", \ "link_text": "Available online", \ "sfx_url": "http://sfx.scholarsportal.info/algoma", \ - "access_note": "Access restricted to users with a valid Algoma University ID" \ + "access_note": "Access restricted to users with a valid Algoma University ID ;" \ } self.laurentian = { \ @@ -36,7 +39,7 @@ class Institution(): "proxy": "https://librweb.laurentian.ca/login?url=", \ "link_text": "Available online / disponible en ligne", \ "sfx_url": "http://sfx.scholarsportal.info/laurentian", \ - "access_note": "Access restricted to users with a valid Laurentian University ID" \ + "access_note": "Access restricted to users with a valid Laurentian University ID ;" \ } self.windsor = { \ @@ -45,7 +48,7 @@ class Institution(): "proxy": "http://ezproxy.uwindsor.ca/login?url=", \ "link_text": "Available online", \ "sfx_url": "http://sfx.scholarsportal.info/windsor", \ - "access_note": "Access restricted to users with a valid University of Windsor ID" \ + "access_note": "Access restricted to users with a valid University of Windsor ID ;" \ } def get_settings(self, lib): @@ -92,8 +95,12 @@ Required arguments: -c / --consortium : The name of the consortial license to be inserted in each 506$b access restriction note. - -p / --publisher : The name of the publisher to be inserted in a 710 field. + -p / --publisher : The name of the publisher to be inserted in a 710 field + with a subfield 4 relator code 'pbl'. + -P / --platform: The name of the digital platform to be inserted in a 710 + field. + -A / --algoma: Add an 856 for Algoma University -L / --laurentian: Add an 856 for Laurentian University @@ -129,6 +136,8 @@ def consolidate_options(opts): _options['--consortium'] = val elif key == '-p': _options['--publisher'] = val + elif key == '-P': + _options['--platform'] = val elif key == '-n': _options['--note'] = val elif key == '-A': @@ -206,6 +215,9 @@ def check_options(options): if '--note' in options: clean_opts['note'] = options['--note'] + if '--platform' in options: + clean_opts['platform'] = options['--platform'] + clean_opts['libraries'] = _libraries clean_opts['input'] = _input clean_opts['output'] = _output @@ -232,10 +244,10 @@ def check_libraries(options): def parse_opts(): """Get command-line arguments from the script""" try: - _short_opts = 'i:o:a:c:p:ALWn:s:h' + _short_opts = 'i:o:a:c:p:ALWn:P:s:h' _long_opts = ['input=', 'output=', 'authorization=', 'consortium=', 'publisher=', 'algoma', 'laurentian', 'windsor', 'note=', - 'sample=', 'help' + 'platform=', 'sample=', 'help' ] opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts) except getopt.GetoptError, ex: @@ -248,6 +260,7 @@ def parse_opts(): def process_records(options): """Converts raw ebook MARC records to Conifer-ready MARC records""" + global RECORD_COUNT sample = '' reader = pymarc.MARCReader( open(options['input'], mode='rb'), to_unicode=True @@ -256,32 +269,50 @@ def process_records(options): if ('sample' in options): sample = pymarc.MARCWriter(open(options['sample'], mode='wb')) - cnt = 0 for record in reader: - cnt = cnt + 1 + RECORD_COUNT += 1 try: if not (record['856'] and record['856']['u']): print("* No 856 for record # %s in file %s" - % (cnt, options['input']) + % (RECORD_COUNT, options['input']) ) new_record = process_fields(record, options) writer.write(new_record) - if (sample and ((cnt == 1) or (cnt % 100 == 0))): + if (sample and ((RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0))): sample.write(new_record) except Exception, ex: - print("* Error processing record %s - %s" % (cnt, ex)) + print("* Error processing record %d - %s" % (RECORD_COUNT, ex)) def process_fields(record, options): """Decide which fields to add, delete, and keep""" new_record = pymarc.Record(to_unicode=True, force_utf8=True) + add_cat_source(new_record, options) # 040 + add_restriction(record, options) # 506 + + # 590 + if 'note' in options: + note = pymarc.Field(tag = '590', + indicators = [' ', ' '], + subfields = [ + 'a', options['note'] + ] + ) + record.add_field(note) + + add_marc_source(record, options) # 598 + publisher = add_publisher(record, options) # 710 + add_platform(record, options) # 710 + + marked_isbn = mark_isbn_for_sfx(record, options) + for field in record.get_fields(): # Process all of the 856 fields if field.tag == '856': - new_fields = process_urls(field, options) + new_fields = process_urls(field, options, publisher) if new_fields: for new_856 in new_fields: new_record.add_field(new_856) @@ -294,41 +325,46 @@ def process_fields(record, options): else: new_record.add_field(field) - add_publisher(record, new_record, options) - add_restriction(new_record, options) - marked_isbn = mark_isbn_for_sfx(new_record, options) if not marked_isbn: - print("No matching ISBN target found in SFX for %s" % - (new_record['856']['u']) - ) - - if 'note' in options: - note = pymarc.Field(tag = '590', - indicators = [' ', ' '], - subfields = [ - 'a', options['note'] - ] - ) - new_record.add_field(note) - - add_cat_source(new_record, options) + try: + isbn = record['020']['a'] + print("ISBN: [%s] - no matching ISBN target found in SFX for %s" % + (isbn, new_record['856']['u']) + ) + except: + print("No matching ISBN target found in SFX for %s" % + (new_record['856']['u']) + ) return new_record -def add_publisher(record, new_record, options): +def add_publisher(record, options): """ This is a convoluted way to avoid creating a new 710 if we already have a matching 710 and just need to add the publisher relator code. """ + publisher = options['publisher'] munge_publisher = False need_publisher = True need_relator = True + raw_publisher = None + try: + raw_publisher = record['260']['b'] + except: + pass + + if raw_publisher: + if 'Oxford' in raw_publisher or 'Clarendon' in raw_publisher: + publisher = 'Oxford University Press' + elif 'Cambridge' in raw_publisher: + publisher = 'Cambridge University Press' + # Iterate through all of the existing 710 fields for sten in record.get_fields('710'): for pub in sten.get_subfields('a'): - if pub == options['publisher']: + if pub == publisher: munge_publisher = True for rel in sten.get_subfields('4'): if rel == 'pbl': @@ -344,12 +380,39 @@ def add_publisher(record, new_record, options): seven_ten = pymarc.Field(tag = '710', indicators = ['2', ' '], subfields = [ - 'a', options['publisher'], + 'a', publisher, '4', 'pbl' ] ) - new_record.add_field(seven_ten) + record.add_field(seven_ten) + + return publisher + +def add_platform(record, options): + """ + This is a convoluted way to avoid creating a new 710 if we already + have a matching 710 for digital platform. + """ + + platform = options['platform'] + need_platform = True + # Iterate through all of the existing 710 fields + for sten in record.get_fields('710'): + for pub in sten.get_subfields('a'): + if pub == platform: + need_platform = False + + if need_platform: + # Add the platform + seven_ten = pymarc.Field(tag = '710', + indicators = ['2', ' '], + subfields = [ + 'a', platform + ] + ) + record.add_field(seven_ten) + def mark_isbn_for_sfx(record, options): """ Adds a $9 subfield to the 020 (ISBN) field to use for SFX look-ups @@ -441,6 +504,11 @@ def add_restriction(new_record, options): * $9 - Institutional code to which this note applies """ + # Add a period if the authorization ends with a number or letter + authnote = options['authorization'] + if authnote[-1] not in '.)]': + authnote += '.' + for library in options['libraries']: libopts = options['settings'].get_settings(library) # Add the access restriction note @@ -448,8 +516,8 @@ def add_restriction(new_record, options): indicators = ['1', ' '], subfields = [ 'a', libopts['access_note'], - 'b', options['consortium'], - 'e', options['authorization'], + 'b', options['consortium'] + ' ; ', + 'e', authnote, '9', libopts['code'] ] ) @@ -474,8 +542,26 @@ def add_cat_source(record, options): ) record.add_field(forty) +def add_marc_source(record, options): + """ + Add a 598 field identifying the source MARC file name and processing date + """ -def process_urls(field, options): + global RECORD_COUNT + + source = os.path.basename(options['input']) + + marc_source = pymarc.Field(tag = '598', + indicators = [' ', ' '], + subfields = [ + 'a', source, + 'b', date.today().isoformat(), + 'c', str(RECORD_COUNT) + ] + ) + record.add_field(marc_source) + +def process_urls(field, options, publisher): """Creates 856 fields required by Conifer""" new_fields = [] @@ -491,6 +577,11 @@ def process_urls(field, options): new_fields.append(enrich) else: for lib in options['libraries']: + + # Tweak for Algoma for combined CUP/OUP + if lib == 'algoma' and 'Cambridge' in publisher: + continue + data = options['settings'].get_settings(lib) subs = get_subfields(field, data) eight_five_six = pymarc.Field(tag = '856',