"""
import os, os.path, sys, getopt, pymarc, pymarc.marc8, re, urllib2
+from datetime import date
from BeautifulSoup import BeautifulSoup
+RECORD_COUNT = 0
+
class Institution():
"""Defines standard settings for each Conifer institution"""
"proxy": "http://libproxy.auc.ca/login?url=", \
"link_text": "Available online", \
"sfx_url": "http://sfx.scholarsportal.info/algoma", \
- "access_note": "Access restricted to users with a valid Algoma University ID" \
+ "access_note": "Access restricted to users with a valid Algoma University ID ;" \
}
self.laurentian = { \
"proxy": "https://librweb.laurentian.ca/login?url=", \
"link_text": "Available online / disponible en ligne", \
"sfx_url": "http://sfx.scholarsportal.info/laurentian", \
- "access_note": "Access restricted to users with a valid Laurentian University ID" \
+ "access_note": "Access restricted to users with a valid Laurentian University ID ;" \
}
self.windsor = { \
"proxy": "http://ezproxy.uwindsor.ca/login?url=", \
"link_text": "Available online", \
"sfx_url": "http://sfx.scholarsportal.info/windsor", \
- "access_note": "Access restricted to users with a valid University of Windsor ID" \
+ "access_note": "Access restricted to users with a valid University of Windsor ID ;" \
}
def get_settings(self, lib):
-c / --consortium : The name of the consortial license to be inserted in
each 506$b access restriction note.
- -p / --publisher : The name of the publisher to be inserted in a 710 field.
+ -p / --publisher : The name of the publisher to be inserted in a 710 field
+ with a subfield 4 relator code 'pbl'.
+ -P / --platform: The name of the digital platform to be inserted in a 710
+ field.
+
-A / --algoma: Add an 856 for Algoma University
-L / --laurentian: Add an 856 for Laurentian University
_options['--consortium'] = val
elif key == '-p':
_options['--publisher'] = val
+ elif key == '-P':
+ _options['--platform'] = val
elif key == '-n':
_options['--note'] = val
elif key == '-A':
if '--note' in options:
clean_opts['note'] = options['--note']
+ if '--platform' in options:
+ clean_opts['platform'] = options['--platform']
+
clean_opts['libraries'] = _libraries
clean_opts['input'] = _input
clean_opts['output'] = _output
def parse_opts():
"""Get command-line arguments from the script"""
try:
- _short_opts = 'i:o:a:c:p:ALWn:s:h'
+ _short_opts = 'i:o:a:c:p:ALWn:P:s:h'
_long_opts = ['input=', 'output=', 'authorization=', 'consortium=',
'publisher=', 'algoma', 'laurentian', 'windsor', 'note=',
- 'sample=', 'help'
+ 'platform=', 'sample=', 'help'
]
opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts)
except getopt.GetoptError, ex:
def process_records(options):
"""Converts raw ebook MARC records to Conifer-ready MARC records"""
+ global RECORD_COUNT
sample = ''
reader = pymarc.MARCReader(
open(options['input'], mode='rb'), to_unicode=True
if ('sample' in options):
sample = pymarc.MARCWriter(open(options['sample'], mode='wb'))
- cnt = 0
for record in reader:
- cnt = cnt + 1
+ RECORD_COUNT += 1
try:
if not (record['856'] and record['856']['u']):
print("* No 856 for record # %s in file %s"
- % (cnt, options['input'])
+ % (RECORD_COUNT, options['input'])
)
new_record = process_fields(record, options)
writer.write(new_record)
- if (sample and ((cnt == 1) or (cnt % 100 == 0))):
+ if (sample and ((RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0))):
sample.write(new_record)
except Exception, ex:
- print("* Error processing record %s - %s" % (cnt, ex))
+ print("* Error processing record %d - %s" % (RECORD_COUNT, ex))
def process_fields(record, options):
"""Decide which fields to add, delete, and keep"""
new_record = pymarc.Record(to_unicode=True, force_utf8=True)
+ add_cat_source(new_record, options) # 040
+ add_restriction(record, options) # 506
+
+ # 590
+ if 'note' in options:
+ note = pymarc.Field(tag = '590',
+ indicators = [' ', ' '],
+ subfields = [
+ 'a', options['note']
+ ]
+ )
+ record.add_field(note)
+
+ add_marc_source(record, options) # 598
+ publisher = add_publisher(record, options) # 710
+ add_platform(record, options) # 710
+
+ marked_isbn = mark_isbn_for_sfx(record, options)
+
for field in record.get_fields():
# Process all of the 856 fields
if field.tag == '856':
- new_fields = process_urls(field, options)
+ new_fields = process_urls(field, options, publisher)
if new_fields:
for new_856 in new_fields:
new_record.add_field(new_856)
else:
new_record.add_field(field)
- add_publisher(record, new_record, options)
- add_restriction(new_record, options)
- marked_isbn = mark_isbn_for_sfx(new_record, options)
if not marked_isbn:
- print("No matching ISBN target found in SFX for %s" %
- (new_record['856']['u'])
- )
-
- if 'note' in options:
- note = pymarc.Field(tag = '590',
- indicators = [' ', ' '],
- subfields = [
- 'a', options['note']
- ]
- )
- new_record.add_field(note)
-
- add_cat_source(new_record, options)
+ try:
+ isbn = record['020']['a']
+ print("ISBN: [%s] - no matching ISBN target found in SFX for %s" %
+ (isbn, new_record['856']['u'])
+ )
+ except:
+ print("No matching ISBN target found in SFX for %s" %
+ (new_record['856']['u'])
+ )
return new_record
-def add_publisher(record, new_record, options):
+def add_publisher(record, options):
"""
This is a convoluted way to avoid creating a new 710 if we already
have a matching 710 and just need to add the publisher relator code.
"""
+ publisher = options['publisher']
munge_publisher = False
need_publisher = True
need_relator = True
+ raw_publisher = None
+ try:
+ raw_publisher = record['260']['b']
+ except:
+ pass
+
+ if raw_publisher:
+ if 'Oxford' in raw_publisher or 'Clarendon' in raw_publisher:
+ publisher = 'Oxford University Press'
+ elif 'Cambridge' in raw_publisher:
+ publisher = 'Cambridge University Press'
+
# Iterate through all of the existing 710 fields
for sten in record.get_fields('710'):
for pub in sten.get_subfields('a'):
- if pub == options['publisher']:
+ if pub == publisher:
munge_publisher = True
for rel in sten.get_subfields('4'):
if rel == 'pbl':
seven_ten = pymarc.Field(tag = '710',
indicators = ['2', ' '],
subfields = [
- 'a', options['publisher'],
+ 'a', publisher,
'4', 'pbl'
]
)
- new_record.add_field(seven_ten)
+ record.add_field(seven_ten)
+
+ return publisher
+
+def add_platform(record, options):
+ """
+ This is a convoluted way to avoid creating a new 710 if we already
+ have a matching 710 for digital platform.
+ """
+
+ platform = options['platform']
+ need_platform = True
+ # Iterate through all of the existing 710 fields
+ for sten in record.get_fields('710'):
+ for pub in sten.get_subfields('a'):
+ if pub == platform:
+ need_platform = False
+
+ if need_platform:
+ # Add the platform
+ seven_ten = pymarc.Field(tag = '710',
+ indicators = ['2', ' '],
+ subfields = [
+ 'a', platform
+ ]
+ )
+ record.add_field(seven_ten)
+
def mark_isbn_for_sfx(record, options):
"""
Adds a $9 subfield to the 020 (ISBN) field to use for SFX look-ups
* $9 - Institutional code to which this note applies
"""
+ # Add a period if the authorization ends with a number or letter
+ authnote = options['authorization']
+ if authnote[-1] not in '.)]':
+ authnote += '.'
+
for library in options['libraries']:
libopts = options['settings'].get_settings(library)
# Add the access restriction note
indicators = ['1', ' '],
subfields = [
'a', libopts['access_note'],
- 'b', options['consortium'],
- 'e', options['authorization'],
+ 'b', options['consortium'] + ' ; ',
+ 'e', authnote,
'9', libopts['code']
]
)
)
record.add_field(forty)
+def add_marc_source(record, options):
+ """
+ Add a 598 field identifying the source MARC file name and processing date
+ """
-def process_urls(field, options):
+ global RECORD_COUNT
+
+ source = os.path.basename(options['input'])
+
+ marc_source = pymarc.Field(tag = '598',
+ indicators = [' ', ' '],
+ subfields = [
+ 'a', source,
+ 'b', date.today().isoformat(),
+ 'c', str(RECORD_COUNT)
+ ]
+ )
+ record.add_field(marc_source)
+
+def process_urls(field, options, publisher):
"""Creates 856 fields required by Conifer"""
new_fields = []
new_fields.append(enrich)
else:
for lib in options['libraries']:
+
+ # Tweak for Algoma for combined CUP/OUP
+ if lib == 'algoma' and 'Cambridge' in publisher:
+ continue
+
data = options['settings'].get_settings(lib)
subs = get_subfields(field, data)
eight_five_six = pymarc.Field(tag = '856',