-W / --windsor : Add an 856 for University of Windsor
Optional arguments:
+ -C / --clean : Try to clean up diacritics - some of the records we get
+ have corrupted diacritics.
+
-d / --duplicate : The name of the file to route ISBN duplicates to.
-t / --tcn : The name of the file to route TCN duplicates to.
'-o': '--output',
'-a': '--authorization',
'-c': '--consortium',
+ '-C': '--clean',
'-d': '--duplicate',
'-e': '--ebrary',
'-p': '--publisher',
clean_opts['consortium'] = options['--consortium'].decode('utf-8')
clean_opts['authorization'] = options['--authorization'].decode('utf-8')
+ if '--clean' in options:
+ clean_opts['clean'] = True
if '--duplicate' in options:
clean_opts['duplicate'] = options['--duplicate']
def parse_opts():
"""Get command-line arguments from the script"""
try:
- _short_opts = 'i:o:a:c:p:P:ABLWe:d:t:u:n:s:h'
+ _short_opts = 'i:o:a:c:p:P:ABCLWe:d:t:u:n:s:h'
_long_opts = ['input=', 'output=', 'authorization=', 'consortium=',
'publisher=', 'platform=', 'algoma', 'boreal', 'laurentian',
- 'windsor', 'ebrary', 'duplicate=', 'tcn=', 'url=', 'note=',
+ 'windsor', 'ebrary', 'clean', 'duplicate=', 'tcn=', 'url=', 'note=',
'sample=', 'help'
]
opts = getopt.getopt(sys.argv[1:], _short_opts, _long_opts)
marked_isbn = mark_isbn_for_sfx(record, options)
for field in record.get_fields():
- field = clean_diacritics(field)
+ if 'clean' in options:
+ field = clean_diacritics(field)
# Process all of the 856 fields
if field.tag == '856':
new_fields = process_urls(field, options, publisher)