"ebrary_code": "algomauca", \
"proxy": "http://libproxy.auc.ca/login?url=", \
"link_text": "Available online", \
+ "sfx_url": "http://sfx.scholarsportal.info/algoma", \
"access_note": "Access restricted to users with a valid Algoma University ID" \
}
"ebrary_code": "jndlu", \
"proxy": "https://librweb.laurentian.ca/login?url=", \
"link_text": "Available online / disponible en ligne", \
+ "sfx_url": "http://sfx.scholarsportal.info/laurentian", \
"access_note": "Access restricted to users with a valid Laurentian University ID" \
}
"ebrary_code": "oculwindsor", \
"proxy": "http://ezproxy.uwindsor.ca/login?url=", \
"link_text": "To view Windsor's electronic resource click here.", \
+ "sfx_url": "http://sfx.scholarsportal.info/windsor", \
"access_note": "Access restricted to users with a valid University of Windsor ID" \
}
print "* Missing -c / --consortium argument!"
_help = True
- if '--restriction' not in options:
+ if '--authorization' not in options:
print "* Missing -a / --authorization argument!"
_help = True
def parse_opts():
"""Get command-line arguments from the script"""
try:
- _short_opts = 'i:o:p:ALWn:s:h'
- _long_opts = ['input=', 'output=', 'publisher=', 'algoma', \
- 'laurentian', 'windsor', 'note=', 'sample=', 'help']
+ _short_opts = 'i:o:a:c:p:ALWn:s:h'
+ _long_opts = ['input=', 'output=', 'authorization=', 'consortium=',
+ 'publisher=', 'algoma', 'laurentian', 'windsor', 'note=',
+ 'sample=', 'help'
+ ]
opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts)
except getopt.GetoptError, ex:
print "* %s" % str(ex)
add_publisher(record, new_record, options)
add_restriction(new_record, options)
+ marked_isbn = mark_isbn_for_sfx(record, options)
+ if not marked_isbn:
+ print("No matching ISBN target found in SFX for %s" %
+ (new_record['856']['u'])
+ )
if 'note' in options:
note = pymarc.Field(tag = '590',
)
new_record.add_field(seven_ten)
+def mark_isbn_for_sfx(record, options):
+ """
+ Adds a $9 subfield to the 020 (ISBN) field to use for SFX look-ups
+
+ Assumes that the holdings in the SFX knowledgebase have been enabled
+ before the ebook processing script runs, or else we will not find any
+ matches.
+ """
+
+ for isbn in record.get_fields('020'):
+ for isbnval in isbn.get_subfields('a'):
+ isbnval = clean_isbn(isbnval)
+ sfx = 'http://sfx.scholarsportal.info/windsor'
+ # check to see if there are holdings in SFX knowledgebase
+ # use one of the participating libraries
+ if 'windsor' in options['libraries']:
+ sfx = options['settings'].get_settings('windsor')['sfx_url']
+ elif 'laurentian' in options['libraries']:
+ sfx = options['settings'].get_settings('laurentian')['sfx_url']
+ elif 'algoma' in options['libraries']:
+ sfx = options['settings'].get_settings('algoma')['sfx_url']
+
+ url = "%s?url_ver=Z39.88-2004&url_ctx_fmt=infofi/fmt:kev:mtx:ctx&" \
+ "ctx_enc=UTF-8&ctx_ver=Z39.88-2004&rfr_id=info:sid/evergreen&" \
+ "sfx.ignore_date_threshold=1&" \
+ "sfx.response_type=multi_obj_detailed_xml" \
+ "&__service_type=getFullTxt&rft.isbn=%s" % (sfx, isbnval)
+
+ try:
+ req = urllib2.urlopen(url)
+ sfx_res = BeautifulSoup(req.read())
+ except urllib2.HTTPError, ex:
+ print("%s for URL %s" % (ex, url))
+ return None
+ except urllib2.URLError, ex:
+ print("%s for URL %s" % (ex, url))
+ return None
+
+ # We want a target with a service_type element of 'getFullTxt'
+ targets = sfx_res.ctx_obj.ctx_obj_targets.findAll(
+ 'target', recursive=False
+ )
+
+ if len(targets) == 0:
+ # No SFX targets found for this ISBN - next!
+ continue
+
+ for target in targets:
+ if target.service_type.renderContents() == 'getFullTxt':
+ # Add the $9 subfield to mark this as a good one
+ isbn.add_subfield('9', 'SFX')
+ return True
+ return False
+
+def clean_isbn(isbn):
+ """
+ Return a normalized ISBN from a MARC subfield
+
+ Trims whitespace, removes hyphens, and removes trailing descriptions
+ like '(pbk)' and the like so that the ISBN can be reliably used in lookups
+ """
+
+ # Remove whitespace from both sides
+ isbn = isbn.strip()
+
+ # Grab the first string beginning with a digit
+ isbn_match = re.search(r'^[\D]*([\d]+\S+).*?$', isbn)
+
+ if not isbn_match.group(1):
+ return None
+
+ # Replace hyphens
+ isbn = isbn_match.group(1).replace('-', '');
+
+ return isbn
+
def add_restriction(new_record, options):
"""
Adds a 506 access restriction note per institution
if __name__ == '__main__':
-
+
process_records(parse_opts())