"""
import os, os.path, sys, getopt, pymarc, pymarc.marc8, re, urllib2
-from datetime import date
from BeautifulSoup import BeautifulSoup
-RECORD_COUNT = 0
-
class Institution():
"""Defines standard settings for each Conifer institution"""
"proxy": "http://libproxy.auc.ca/login?url=", \
"link_text": "Available online", \
"sfx_url": "http://sfx.scholarsportal.info/algoma", \
- "access_note": "Access restricted to users with a valid Algoma University ID ;" \
+ "access_note": "Access restricted to users with a valid Algoma University ID" \
}
self.laurentian = { \
"proxy": "https://librweb.laurentian.ca/login?url=", \
"link_text": "Available online / disponible en ligne", \
"sfx_url": "http://sfx.scholarsportal.info/laurentian", \
- "access_note": "Access restricted to users with a valid Laurentian University ID ;" \
+ "access_note": "Access restricted to users with a valid Laurentian University ID" \
}
self.windsor = { \
"proxy": "http://ezproxy.uwindsor.ca/login?url=", \
"link_text": "Available online", \
"sfx_url": "http://sfx.scholarsportal.info/windsor", \
- "access_note": "Access restricted to users with a valid University of Windsor ID ;" \
+ "access_note": "Access restricted to users with a valid University of Windsor ID" \
}
def get_settings(self, lib):
-c / --consortium : The name of the consortial license to be inserted in
each 506$b access restriction note.
- -p / --publisher : The name of the publisher to be inserted in a 710 field
- with a subfield 4 relator code 'pbl'.
-
- -P / --platform: The name of the digital platform to be inserted in a 710
- field.
+ -p / --publisher : The name of the publisher to be inserted in a 710 field.
-A / --algoma: Add an 856 for Algoma University
_options['--consortium'] = val
elif key == '-p':
_options['--publisher'] = val
- elif key == '-P':
- _options['--platform'] = val
elif key == '-n':
_options['--note'] = val
elif key == '-A':
if '--note' in options:
clean_opts['note'] = options['--note']
- if '--platform' in options:
- clean_opts['platform'] = options['--platform']
-
clean_opts['libraries'] = _libraries
clean_opts['input'] = _input
clean_opts['output'] = _output
def parse_opts():
"""Get command-line arguments from the script"""
try:
- _short_opts = 'i:o:a:c:p:ALWn:P:s:h'
+ _short_opts = 'i:o:a:c:p:ALWn:s:h'
_long_opts = ['input=', 'output=', 'authorization=', 'consortium=',
'publisher=', 'algoma', 'laurentian', 'windsor', 'note=',
- 'platform=', 'sample=', 'help'
+ 'sample=', 'help'
]
opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts)
except getopt.GetoptError, ex:
def process_records(options):
"""Converts raw ebook MARC records to Conifer-ready MARC records"""
- global RECORD_COUNT
sample = ''
reader = pymarc.MARCReader(
open(options['input'], mode='rb'), to_unicode=True
if ('sample' in options):
sample = pymarc.MARCWriter(open(options['sample'], mode='wb'))
+ cnt = 0
for record in reader:
- RECORD_COUNT += 1
+ cnt = cnt + 1
try:
if not (record['856'] and record['856']['u']):
- print("* No 856 for record # %d in file %s"
- % (RECORD_COUNT, options['input'])
+ print("* No 856 for record # %s in file %s"
+ % (cnt, options['input'])
)
new_record = process_fields(record, options)
writer.write(new_record)
- if (sample and ((RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0))):
+ if (sample and ((cnt == 1) or (cnt % 100 == 0))):
sample.write(new_record)
except Exception, ex:
- print("* Error processing record %d - %s" % (RECORD_COUNT, ex))
+ print("* Error processing record %s - %s" % (cnt, ex))
def process_fields(record, options):
"""Decide which fields to add, delete, and keep"""
new_record = pymarc.Record(to_unicode=True, force_utf8=True)
- add_cat_source(new_record, options) # 040
- add_restriction(record, options) # 506
-
- # 590
- if 'note' in options:
- note = pymarc.Field(tag = '590',
- indicators = [' ', ' '],
- subfields = [
- 'a', options['note']
- ]
- )
- record.add_field(note)
-
- add_marc_source(record, options) # 598
- publisher = add_publisher(record, options) # 710
- add_platform(record, options) # 710
-
- marked_isbn = mark_isbn_for_sfx(record, options)
-
for field in record.get_fields():
# Process all of the 856 fields
if field.tag == '856':
- new_fields = process_urls(field, options, publisher)
+ new_fields = process_urls(field, options)
if new_fields:
for new_856 in new_fields:
new_record.add_field(new_856)
else:
new_record.add_field(field)
+ add_publisher(record, new_record, options)
+ add_restriction(new_record, options)
+ marked_isbn = mark_isbn_for_sfx(new_record, options)
if not marked_isbn:
- try:
- isbn = record['020']['a']
- print("ISBN: [%s] - no matching ISBN target found in SFX for %s" %
- (isbn, new_record['856']['u'])
- )
- except:
- print("No matching ISBN target found in SFX for %s" %
- (new_record['856']['u'])
- )
+ print("No matching ISBN target found in SFX for %s" %
+ (new_record['856']['u'])
+ )
+
+ if 'note' in options:
+ note = pymarc.Field(tag = '590',
+ indicators = [' ', ' '],
+ subfields = [
+ 'a', options['note']
+ ]
+ )
+ new_record.add_field(note)
+
+ add_cat_source(new_record, options)
return new_record
-def add_publisher(record, options):
+def add_publisher(record, new_record, options):
"""
This is a convoluted way to avoid creating a new 710 if we already
have a matching 710 and just need to add the publisher relator code.
"""
- publisher = options['publisher']
munge_publisher = False
need_publisher = True
need_relator = True
- raw_publisher = None
- try:
- raw_publisher = record['260']['b']
- except:
- pass
-
- if raw_publisher:
- if 'Oxford' in raw_publisher or 'Clarendon' in raw_publisher:
- publisher = 'Oxford University Press'
- elif 'Cambridge' in raw_publisher:
- publisher = 'Cambridge University Press'
-
# Iterate through all of the existing 710 fields
for sten in record.get_fields('710'):
for pub in sten.get_subfields('a'):
- if pub == publisher:
+ if pub == options['publisher']:
munge_publisher = True
for rel in sten.get_subfields('4'):
if rel == 'pbl':
seven_ten = pymarc.Field(tag = '710',
indicators = ['2', ' '],
subfields = [
- 'a', publisher,
+ 'a', options['publisher'],
'4', 'pbl'
]
)
- record.add_field(seven_ten)
-
- return publisher
-
-def add_platform(record, options):
- """
- This is a convoluted way to avoid creating a new 710 if we already
- have a matching 710 for digital platform.
- """
-
- platform = options['platform']
- need_platform = True
-
- # Iterate through all of the existing 710 fields
- for sten in record.get_fields('710'):
- for pub in sten.get_subfields('a'):
- if pub == platform:
- need_platform = False
-
- if need_platform:
- # Add the platform
- seven_ten = pymarc.Field(tag = '710',
- indicators = ['2', ' '],
- subfields = [
- 'a', platform
- ]
- )
- record.add_field(seven_ten)
+ new_record.add_field(seven_ten)
def mark_isbn_for_sfx(record, options):
"""
"""
# For every ISBN in the record
- for isbn in record.get_fields('020'):
- for isbnval in isbn.get_subfields('a'):
+ for isbn in record.get_fields('020', '024'):
+ for isbnval in isbn.get_subfields('a', 'z'):
isbnval = clean_isbn(isbnval)
# And for every library we have enabled
for lib in options['libraries']:
- sfx = options['settings'].get_settings(lib)['sfx_url']
- url = "%s?url_ver=Z39.88-2004&url_ctx_fmt=infofi/fmt:kev:mtx:ctx&" \
- "ctx_enc=UTF-8&ctx_ver=Z39.88-2004&rfr_id=info:sid/evergreen&" \
- "sfx.ignore_date_threshold=1&" \
- "sfx.response_type=multi_obj_detailed_xml" \
- "&__service_type=getFullTxt&rft.isbn=%s" % (sfx, isbnval)
-
- try:
- req = urllib2.urlopen(url)
- sfx_res = BeautifulSoup(req.read())
- except urllib2.HTTPError, ex:
- print("%s for URL %s" % (ex, url))
- continue
- except urllib2.URLError, ex:
- print("%s for URL %s" % (ex, url))
- continue
-
- # We want a target with a service_type element of 'getFullTxt'
- targets = sfx_res.ctx_obj.ctx_obj_targets.findAll(
- 'target', recursive=False
- )
+ found = check_for_isbn(options, lib, isbnval)
+ if found:
+ # Add the $9 subfield to mark this as a good one
+ isbn.add_subfield('9', 'SFX')
+ return True
+ return False
+
+def check_for_isbn(options, lib, isbnval):
+ """
+ Given an ISBN value, check SFX at the specified library for a match
+ """
+ sfx = options['settings'].get_settings(lib)['sfx_url']
+ url = "%s?url_ver=Z39.88-2004&url_ctx_fmt=infofi/fmt:kev:mtx:ctx&" \
+ "ctx_enc=UTF-8&ctx_ver=Z39.88-2004&rfr_id=info:sid/evergreen&" \
+ "sfx.ignore_date_threshold=1&" \
+ "sfx.response_type=multi_obj_detailed_xml" \
+ "&__service_type=getFullTxt&rft.isbn=%s" % (sfx, isbnval)
+
+ try:
+ req = urllib2.urlopen(url)
+ sfx_res = BeautifulSoup(req.read())
+ except urllib2.HTTPError, ex:
+ print("%s for URL %s" % (ex, url))
+ return False
+ except urllib2.URLError, ex:
+ print("%s for URL %s" % (ex, url))
+ return False
- if len(targets) == 0:
- # No SFX targets found for this ISBN - next!
- continue
+ # We want a target with a service_type element of 'getFullTxt'
+ targets = sfx_res.ctx_obj.ctx_obj_targets.findAll(
+ 'target', recursive=False
+ )
+
+ if len(targets) == 0:
+ # No SFX targets found for this ISBN - next!
+ return False
+
+ for target in targets:
+ if target.service_type.renderContents() == 'getFullTxt':
+ return True
- for target in targets:
- if target.service_type.renderContents() == 'getFullTxt':
- # Add the $9 subfield to mark this as a good one
- isbn.add_subfield('9', 'SFX')
- return True
return False
def clean_isbn(isbn):
* $9 - Institutional code to which this note applies
"""
- # Add a period if the authorization ends with a number or letter
- authnote = options['authorization']
- if authnote[-1] not in '.)]':
- authnote += '.'
-
for library in options['libraries']:
libopts = options['settings'].get_settings(library)
# Add the access restriction note
indicators = ['1', ' '],
subfields = [
'a', libopts['access_note'],
- 'b', options['consortium'] + ' ; ',
- 'e', authnote,
+ 'b', options['consortium'],
+ 'e', options['authorization'],
'9', libopts['code']
]
)
)
record.add_field(forty)
-def add_marc_source(record, options):
- """
- Add a 598 field identifying the source MARC file name and processing date
- """
-
- global RECORD_COUNT
- source = os.path.basename(options['input'])
-
- marc_source = pymarc.Field(tag = '598',
- indicators = [' ', ' '],
- subfields = [
- 'a', source,
- 'b', date.today().isoformat(),
- 'c', str(RECORD_COUNT)
- ]
- )
- record.add_field(marc_source)
-
-def process_urls(field, options, publisher):
+def process_urls(field, options):
"""Creates 856 fields required by Conifer"""
new_fields = []
new_fields.append(enrich)
else:
for lib in options['libraries']:
-
- # Tweak for Algoma for combined CUP/OUP
- if lib == 'algoma' and 'Cambridge' in publisher:
- continue
data = options['settings'].get_settings(lib)
subs = get_subfields(field, data)
eight_five_six = pymarc.Field(tag = '856',
if __name__ == '__main__':
-
process_records(parse_opts())