From: dbs Date: Fri, 3 Dec 2010 15:08:13 +0000 (+0000) Subject: Strip 300 fields that are just placeholders, refactor lengthy functions X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=947e5dbaa12d6cbec49b0419da0ef352d2b4293e;p=contrib%2FConifer.git Strip 300 fields that are just placeholders, refactor lengthy functions git-svn-id: svn://svn.open-ils.org/ILS-Contrib/conifer/branches/rel_1_6_1@1099 6d9bc8c9-1ec2-4278-b937-99fde70a366f --- diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py index d459957d0a..2c542f6eab 100644 --- a/tools/ebooks/prep_ebook_records.py +++ b/tools/ebooks/prep_ebook_records.py @@ -146,16 +146,7 @@ def check_options(options): print "* Missing -p / --publisher argument!" _help = True - _libraries = dict() - if '--algoma' in options: - _libraries['algoma'] = True - - if '--laurentian' in options: - _libraries['laurentian'] = True - - if '--windsor' in options: - _libraries['windsor'] = True - + _libraries = check_libraries(options) if len(_libraries.keys()) == 0: _help = True @@ -194,6 +185,22 @@ def check_options(options): return clean_opts +def check_libraries(options): + """Build a dict of the libraries that were requested for this batch""" + + _libraries = dict() + if '--algoma' in options: + _libraries['algoma'] = True + + if '--laurentian' in options: + _libraries['laurentian'] = True + + if '--windsor' in options: + _libraries['windsor'] = True + + return _libraries + + def parse_opts(): """Get command-line arguments from the script""" try: @@ -219,51 +226,64 @@ def process_records(options): cnt = 0 for record in reader: - url = False cnt = cnt + 1 if not (record['856'] and record['856']['u']): - print("* No 856 for record # %s in file %s" % (cnt, options['input'])) - - new_record = pymarc.Record() - for field in record.get_fields(): - # Only process the first 856 field, for better or worse - if field.tag == '856': - if url == False: - url = True - new_fields = process_urls(field, options) - for new_856 in new_fields: - new_record.add_field(new_856) - # Strip out 9xx fields - elif field.tag[0] == '9': - pass - else: - new_record.add_field(field) - - # Add the publisher, with relator code - seven_ten = pymarc.Field(tag = '710', - indicators = ['2', ' '], - subfields = [ - 'a', options['publisher'], - '4', 'pbl' - ] - ) - new_record.add_field(seven_ten) - - if 'note' in options: - note = pymarc.Field(tag = '590', - indicators = [' ', ' '], - subfields = [ - 'a', options['note'] - ] + print("* No 856 for record # %s in file %s" + % (cnt, options['input']) ) - new_record.add_field(note) - add_cat_source(new_record, options) + new_record = process_fields(record, options) writer.write(new_record) if (sample and ((cnt == 1) or (cnt % 100 == 0))): sample.write(new_record) +def process_fields(record, options): + """Decide which fields to add, delete, and keep""" + + url = False + new_record = pymarc.Record() + + for field in record.get_fields(): + # Only process the first 856 field, for better or worse + if field.tag == '856': + if url == False: + url = True + new_fields = process_urls(field, options) + for new_856 in new_fields: + new_record.add_field(new_856) + # Strip out 9xx fields: we don't want local fields in our records + elif field.tag[0] == '9': + pass + # Strip out 300 fields that only contain placeholders + elif field.tag == '300' and field['a'] == 'p. cm.': + pass + else: + new_record.add_field(field) + + # Add the publisher, with relator code + seven_ten = pymarc.Field(tag = '710', + indicators = ['2', ' '], + subfields = [ + 'a', options['publisher'], + '4', 'pbl' + ] + ) + new_record.add_field(seven_ten) + + if 'note' in options: + note = pymarc.Field(tag = '590', + indicators = [' ', ' '], + subfields = [ + 'a', options['note'] + ] + ) + new_record.add_field(note) + + add_cat_source(new_record, options) + + return new_record + def add_cat_source(record, options): """Add or extend the 040 field to identify the cataloguing source"""