-#!/usr/bin/env python
+#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Prepare sets of electronic resource MARC records for loading into Evergreen
be accommodated in batch load.
"""
-import os, os.path, sys, getopt, pymarc, pymarc.marc8, re, urllib2
+import os, os.path, sys, getopt, pymarc, pymarc.marc8, re, urllib2, json
+from urllib import quote
from datetime import date
from BeautifulSoup import BeautifulSoup
RECORD_COUNT = 0
+DUP_COUNT = 0
+GATEWAY_URL = "http://www.concat.ca/osrf-gateway-v1"
+OPENSRF_ISBN_CALL = "open-ils.search.biblio.isbn"
+OPENSRF_TCN_CALL = "open-ils.search.biblio.tcn"
+OPENSRF_KEYWORD_CALL = "open-ils.search.biblio.multiclass.query"
class Institution():
"""Defines standard settings for each Conifer institution"""
"""Initialize the Institution object"""
self.algoma = { \
"code": "ALGOMASYS", \
+ "lac_symbol": "OSTMA", \
+ "org_unit": "111", \
"ebrary_code": "algomauca", \
"proxy": "http://libproxy.auc.ca/login?url=", \
"link_text": "Available online", \
self.boreal = { \
"code": "BOREALSYS", \
+ "lac_symbol": "BOREALSYS", \
+ "org_unit": "135", \
"ebrary_code": "ocls", \
"proxy": "http://ra.ocls.ca/ra/login.aspx?url=", \
"link_text": "Disponible en ligne", \
self.laurentian = { \
"code": "LUSYS", \
+ "lac_symbol": "OSUL", \
+ "org_unit": "105", \
"ebrary_code": "jndlu", \
"gale_code": "sudb78095", \
"proxy": "https://librweb.laurentian.ca/login?url=", \
self.windsor = { \
"code": "WINDSYS", \
+ "lac_symbol": "OWA", \
+ "org_unit": "106", \
"ebrary_code": "oculwindsor", \
"gale_code": "wind05901", \
"proxy": "http://ezproxy.uwindsor.ca/login?url=", \
-W / --windsor : Add an 856 for University of Windsor
Optional arguments:
+ -d / --duplicate : The name of the file to route ISBN duplicates to.
+
+ -t / --tcn : The name of the file to route TCN duplicates to.
+
+ -u / --url : The name of the file to route URL duplicates to.
+
-e / --ebrary : Put Ebrary 001 into 924 as a URN for SFX lookup purposes
-n / --note : The text of the internal note to be inserted into a 590 field.
'-o': '--output',
'-a': '--authorization',
'-c': '--consortium',
+ '-d': '--duplicate',
'-e': '--ebrary',
'-p': '--publisher',
'-P': '--platform',
'-n': '--note',
+ '-t': '--tcn',
+ '-u': '--url',
'-A': '--algoma',
'-B': '--boreal',
'-L': '--laurentian',
clean_opts['authorization'] = options['--authorization'].decode('utf-8')
+ if '--duplicate' in options:
+ clean_opts['duplicate'] = options['--duplicate']
+
+ if '--tcn' in options:
+ clean_opts['tcn'] = options['--tcn']
+
+ if '--url' in options:
+ clean_opts['url'] = options['--url']
+
if '--ebrary' in options:
clean_opts['ebrary'] = True
return clean_opts
+def evergreen_request(method, *args, **kwargs):
+ service = '.'.join(method.split('.')[:2])
+ kwargs.update({'service':service, 'method':method})
+ params = ['%s=%s' % (k,quote(v)) for k,v in kwargs.items()]
+ params += ['param=%s' % quote(json.dumps(a)) for a in args]
+ url = '%s?%s' % (GATEWAY_URL, '&'.join(params))
+ #print '--->', url
+ req = urllib2.urlopen(url)
+ resp = json.load(req)
+ if resp['status'] != 200:
+ raise Exception('error during evergreen request', resp)
+ payload = resp['payload']
+ #print '<---', payload
+ return payload
+
+def url_check(record, options):
+ global DUP_COUNT, RECORD_COUNT
+
+ match = False
+ match_id = 0
+ for url in record.get_fields('856'):
+ for urlval in url.get_subfields('u'):
+ # print "urlval", urlval
+ for library in options['libraries']:
+ libopts = options['settings'].get_settings(library)
+ keyword_info = evergreen_request(OPENSRF_KEYWORD_CALL,
+ {'org_unit': libopts['org_unit'],
+ 'depth': 1, 'limit': 5, 'offset': 0,
+ 'visibility_limit': 3000,
+ 'default_class': 'keyword'},
+ urlval, 1)
+ bib_ids = keyword_info[0]['ids']
+ for bib_id in bib_ids:
+ match_id = bib_id
+ print("* %d of %d - URL match on %s for %s"
+ % (DUP_COUNT + 1, RECORD_COUNT, urlval, bib_id[0])
+ )
+ match = True
+ return match_id, match
+
+def tcn_check(record):
+ global DUP_COUNT, RECORD_COUNT
+
+ match = False
+ match_id = 0
+ for tcn in record.get_fields('001'):
+ tcn_val = tcn.value()
+ tcn_info = evergreen_request(OPENSRF_TCN_CALL,tcn_val)
+ bib_ids = tcn_info[0]['ids']
+ # print "tcn_info", tcn_info
+ for bib_id in bib_ids:
+ match_id = bib_id
+ print("* %d of %d - TCN match on %s for %s"
+ % (DUP_COUNT + 1, RECORD_COUNT, tcn_val, bib_id)
+ )
+ match = True
+ return match_id, match
+
+def isbn_check(record):
+ global DUP_COUNT, RECORD_COUNT
+
+ match = False
+ match_id = 0
+ for isbn in record.get_fields('020', '024'):
+ for isbnval in isbn.get_subfields('a', 'z'):
+ isbn_val = clean_isbn(isbnval)
+ isbn_info = evergreen_request(OPENSRF_ISBN_CALL,isbnval)
+ match_count = isbn_info[0]['count']
+ #print "count", isbn_info[0]['count']
+ bib_ids = isbn_info[0]['ids']
+ for bib_id in bib_ids:
+ match_id = bib_id
+ print("* %d of %d - ISBN match on %s for %s"
+ % (DUP_COUNT + 1, RECORD_COUNT, isbn_val, bib_id)
+ )
+ match = True
+ return match_id, match
+
def append_period(text):
"""
Append a period to the incoming text if required
def parse_opts():
"""Get command-line arguments from the script"""
try:
- _short_opts = 'i:o:a:c:p:ABLWen:P:s:h'
+ _short_opts = 'i:o:a:c:p:P:ABLWe:d:t:u:n:s:h'
_long_opts = ['input=', 'output=', 'authorization=', 'consortium=',
- 'publisher=', 'algoma', 'boreal', 'laurentian', 'windsor', 'ebrary',
- 'note=', 'platform=', 'sample=', 'help'
+ 'publisher=', 'platform=', 'algoma', 'boreal', 'laurentian', 'windsor', 'ebrary',
+ 'duplicate=', 'tcn=', 'url=', 'note=','sample=', 'help'
]
opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts)
except getopt.GetoptError, ex:
"""Converts raw ebook MARC records to Conifer-ready MARC records"""
global RECORD_COUNT
+ global DUP_COUNT
sample = ''
+ duplicate = ''
+ tcn = ''
+ url = ''
try:
reader = pymarc.MARCReader(
writer = pymarc.MARCWriter(open(options['output'], mode='wb'))
except Exception, ex:
print("Could not open output file [%s]" % options['output'])
-
- if ('sample' in options):
+
+ if 'duplicate' in options:
+ try:
+ duplicate = pymarc.MARCWriter(open(options['duplicate'], mode='wb'))
+ except Exception, ex:
+ print("Could not open output file [%s]" % options['duplicate'])
+
+ if 'tcn' in options:
+ try:
+ tcn = pymarc.MARCWriter(open(options['tcn'], mode='wb'))
+ except Exception, ex:
+ print("Could not open output file [%s]" % options['tcn'])
+
+ if 'url' in options:
+ try:
+ url = pymarc.MARCWriter(open(options['url'], mode='wb'))
+ except Exception, ex:
+ print("Could not open output file [%s]" % options['url'])
+
+ if 'sample' in options:
sample = pymarc.MARCWriter(open(options['sample'], mode='wb'))
for record in reader:
print("* No 856 for record # %s in file %s"
% (RECORD_COUNT, options['input'])
)
-
- new_record = process_fields(record, options)
-
- writer.write(new_record)
- if (sample and ((RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0))):
- sample.write(new_record)
+ else:
+ print ("%d - %s\n" % (RECORD_COUNT,record['856']))
+
+
+ new_record = ''
+ dup_flag = False
+
+ if duplicate:
+ bib_id, dup_flag = isbn_check(record)
+ new_record = process_fields(record, options, bib_id, dup_flag)
+ if dup_flag:
+ duplicate.write(new_record)
+ if tcn:
+ bib_id, dup_flag = tcn_check(record)
+ new_record = process_fields(record, options, bib_id, dup_flag)
+ if dup_flag:
+ tcn.write(new_record)
+ if url:
+ bib_id, dup_flag = url_check(record, options)
+ new_record = process_fields(record, options, bib_id, dup_flag)
+ if dup_flag:
+ url.write(new_record)
+
+ if not dup_flag:
+ new_record = process_fields(record, options, 0, False)
+ else:
+ DUP_COUNT += 1
+
+ if new_record:
+ writer.write(new_record)
+ if (sample and ((RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0))):
+ sample.write(new_record)
except Exception, ex:
print("* Error processing record %d - %s" % (RECORD_COUNT, ex))
-def process_fields(record, options):
+def process_fields(record, options, bib_id, dup_flag):
"""Decide which fields to add, delete, and keep"""
new_record = pymarc.Record(to_unicode=True, force_utf8=True)
# 590
if 'note' in options:
+ note_value = options['note']
note = pymarc.Field(tag = '590',
indicators = [' ', ' '],
subfields = [
- 'a', options['note']
+ 'a', note_value
]
)
record.add_field(note)
+ # 909
+ if dup_flag:
+ dup_value = bib_id + ""
+ dup = pymarc.Field(tag = '909',
+ indicators = [' ', ' '],
+ subfields = [
+ 'a', dup_value
+ ]
+ )
+ record.add_field(dup)
+
add_marc_source(record, options) # 598
publisher = add_publisher(record, options) # 710
add_restriction(record, options, publisher) # 506
add_platform(record, options) # 710
+ # USE FOR SKIPPING SFX
+ # marked_isbn = False
marked_isbn = mark_isbn_for_sfx(record, options)
for field in record.get_fields():
return True
# For ebrary records, add a 924 for the custom URN
- if options['ebrary'] is True:
+ if 'ebrary' in options:
urn = None
for scn in record.get_fields('001'):
urn = pymarc.Field(tag = '924',
'a', append_space_semi_space(libopts['access_note']),
'b', append_space_semi_space(options['consortium']),
'e', authnote,
- '9', libopts['code']
+ '9', libopts['lac_symbol']
]
)
new_record.add_field(note)