Strip 300 fields that are just placeholders, refactor lengthy functions
authordbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Fri, 3 Dec 2010 15:08:13 +0000 (15:08 +0000)
committerdbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Fri, 3 Dec 2010 15:08:13 +0000 (15:08 +0000)
git-svn-id: svn://svn.open-ils.org/ILS-Contrib/conifer/branches/rel_1_6_1@1099 6d9bc8c9-1ec2-4278-b937-99fde70a366f

tools/ebooks/prep_ebook_records.py

index d459957..2c542f6 100644 (file)
@@ -146,16 +146,7 @@ def check_options(options):
         print "* Missing -p / --publisher argument!"
         _help = True
 
-    _libraries = dict() 
-    if '--algoma' in options:
-        _libraries['algoma'] = True
-
-    if '--laurentian' in options:
-        _libraries['laurentian'] = True
-
-    if '--windsor' in options:
-        _libraries['windsor'] = True
-
+    _libraries = check_libraries(options)
     if len(_libraries.keys()) == 0:
         _help = True
 
@@ -194,6 +185,22 @@ def check_options(options):
 
     return clean_opts
 
+def check_libraries(options):
+    """Build a dict of the libraries that were requested for this batch"""
+    
+    _libraries = dict() 
+    if '--algoma' in options:
+        _libraries['algoma'] = True
+
+    if '--laurentian' in options:
+        _libraries['laurentian'] = True
+
+    if '--windsor' in options:
+        _libraries['windsor'] = True
+
+    return _libraries
+
+
 def parse_opts():
     """Get command-line arguments from the script"""
     try:
@@ -219,51 +226,64 @@ def process_records(options):
 
     cnt = 0
     for record in reader:
-        url = False
         cnt = cnt + 1
         if not (record['856'] and record['856']['u']):
-            print("* No 856 for record # %s in file %s" % (cnt, options['input']))
-
-        new_record = pymarc.Record()
-        for field in record.get_fields():
-            # Only process the first 856 field, for better or worse
-            if field.tag == '856':
-                if url == False:
-                    url = True
-                    new_fields = process_urls(field, options)
-                    for new_856 in new_fields:
-                        new_record.add_field(new_856)
-            # Strip out 9xx fields
-            elif field.tag[0] == '9':
-                pass
-            else:
-                new_record.add_field(field)
-
-        # Add the publisher, with relator code
-        seven_ten = pymarc.Field(tag = '710',
-            indicators = ['2', ' '],
-            subfields = [
-                'a', options['publisher'],
-                '4', 'pbl'
-            ]
-        )
-        new_record.add_field(seven_ten)
-
-        if 'note' in options:
-            note = pymarc.Field(tag = '590',
-                indicators = [' ', ' '],
-                subfields = [
-                    'a', options['note']
-                ]
+            print("* No 856 for record # %s in file %s"
+                    % (cnt, options['input'])
             )
-            new_record.add_field(note)
 
-        add_cat_source(new_record, options)
+        new_record = process_fields(record, options)
 
         writer.write(new_record)
         if (sample and ((cnt == 1) or (cnt % 100 == 0))):
             sample.write(new_record)
 
+def process_fields(record, options):
+    """Decide which fields to add, delete, and keep"""
+
+    url = False
+    new_record = pymarc.Record()
+
+    for field in record.get_fields():
+        # Only process the first 856 field, for better or worse
+        if field.tag == '856':
+            if url == False:
+                url = True
+                new_fields = process_urls(field, options)
+                for new_856 in new_fields:
+                    new_record.add_field(new_856)
+        # Strip out 9xx fields: we don't want local fields in our records
+        elif field.tag[0] == '9':
+            pass
+        # Strip out 300 fields that only contain placeholders
+        elif field.tag == '300' and field['a'] == 'p. cm.':
+            pass
+        else:
+            new_record.add_field(field)
+
+    # Add the publisher, with relator code
+    seven_ten = pymarc.Field(tag = '710',
+        indicators = ['2', ' '],
+        subfields = [
+            'a', options['publisher'],
+            '4', 'pbl'
+        ]
+    )
+    new_record.add_field(seven_ten)
+
+    if 'note' in options:
+        note = pymarc.Field(tag = '590',
+            indicators = [' ', ' '],
+            subfields = [
+                'a', options['note']
+            ]
+        )
+        new_record.add_field(note)
+
+    add_cat_source(new_record, options)
+
+    return new_record
+
 def add_cat_source(record, options):
     """Add or extend the 040 field to identify the cataloguing source"""