Add --sample option for generating sample output records

author dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>

Thu, 2 Dec 2010 15:55:57 +0000 (15:55 +0000)

committer dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>

Thu, 2 Dec 2010 15:55:57 +0000 (15:55 +0000)
author dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Thu, 2 Dec 2010 15:55:57 +0000 (15:55 +0000)
committer dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Thu, 2 Dec 2010 15:55:57 +0000 (15:55 +0000)
diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py

index baf051f..82b5aa2 100644 (file)
--- a/tools/ebooks/prep_ebook_records.py
+++ b/tools/ebooks/prep_ebook_records.py
@@ -57,8 +57,8 @@ def do_help():
      print '''
  Conifer ebook MARCXML processor
  
-This script takes a set of MARCXML records and processes them to generate a set
-of MARCXML records ready for loading into the Conifer consortial library
+This script takes a set of MARC records and processes them to generate a set
+of MARC records ready for loading into the Conifer consortial library
  system. The processing consists of taking the existing 856 field and creating
  one or more new 856 fields for each Conifer institution that should have access
  to these resources.
@@ -77,9 +77,9 @@ The script customizes the following aspects of each record:
      line.
  
  Required arguments:
-    -i / --input : The name of the input MARCXML file.
+    -i / --input : The name of the input MARC file.
  
-    -o / --output : The name of the output MARCXML file.
+    -o / --output : The name of the output MARC file.
  
      -p / --publisher : The name of the publisher to be inserted in a 710 field.
  
@@ -92,6 +92,9 @@ Required arguments:
  Optional arguments:
      -n / --note : The text of the internal note to be inserted into a 590 field.
  
+    -s / --sample : The name of the sample output MARC file (generates
+                    1 sample record for every 100 records processed)
+
      -h / --help : Prints help message
  
  Examples:
@@ -119,6 +122,8 @@ def consolidate_options(opts):
              _options['--laurentian'] = val
          elif key == '-W':
              _options['--windsor'] = val
+        elif key == '-s':
+            _options['--sample'] = val
          elif key == '-h':
              _options['--help'] = val
  
@@ -179,6 +184,9 @@ def check_options(options):
      clean_opts = dict()
      clean_opts['publisher'] = options['--publisher']
  
+    if '--sample' in options:
+        clean_opts['sample'] = options['--sample']
+
      if '--note' in options:
          clean_opts['note'] = options['--note']
  
@@ -192,9 +200,9 @@ def check_options(options):
  def parse_opts():
      """Get command-line arguments from the script"""
      try:
-        _short_opts = 'i:o:p:ALWn:h'
+        _short_opts = 'i:o:p:ALWn:s:h'
          _long_opts = ['input=', 'output=', 'publisher=', 'algoma', \
-                'laurentian', 'windsor', 'note=', 'help']
+                'laurentian', 'windsor', 'note=', 'sample=', 'help']
          opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts) 
      except getopt.GetoptError, ex:
          print "* %s" % str(ex)
@@ -206,15 +214,18 @@ def parse_opts():
  def process_records(options):
      """Converts raw ebook MARC records to Conifer-ready MARC records"""
  
+    sample = ''
      reader = pymarc.MARCReader(open(options['input'], 'rb'))
      writer = pymarc.MARCWriter(open(options['output'], 'wb'))
+    if ('sample' in options):
+        sample = pymarc.MARCWriter(open(options['sample'], 'wb'))
  
      cnt = 0
      for record in reader:
          url = False
          cnt = cnt + 1
          if not (record['856'] and record['856']['u']):
-            print("* No 856 for record # %s" % (cnt))
+            print("* No 856 for record # %s in file %s" % (cnt, options['input']))
  
          new_record = pymarc.Record()
          for field in record.get_fields():
@@ -228,10 +239,12 @@ def process_records(options):
              else:
                  new_record.add_field(field)
  
+        # Add the publisher, with relator code
          seven_ten = pymarc.Field(tag = '710',
              indicators = ['2', ' '],
              subfields = [
-                'a', options['publisher'] + '(Firm)'
+                'a', options['publisher'] + ' (Firm)',
+                '4', 'pbl'
              ]
          )
          new_record.add_field(seven_ten)
@@ -248,6 +261,9 @@ def process_records(options):
          add_cat_source(new_record, options)
  
          writer.write(new_record)
+        if (sample and ((cnt == 1) or (cnt % 100 == 0))):
+            print("SHOULD BE")
+            sample.write(new_record)
  
  def add_cat_source(record, options):
      """Add or extend the 040 field to identify the cataloguing source"""
author	dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
	Thu, 2 Dec 2010 15:55:57 +0000 (15:55 +0000)
committer	dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
	Thu, 2 Dec 2010 15:55:57 +0000 (15:55 +0000)