From: Dan Scott Date: Tue, 7 Apr 2020 17:03:12 +0000 (-0400) Subject: Add a date cutoff option for Scholars Portal books X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=c0e409d9e48a77e44318eb65707f12b65c1b0365;p=contrib%2FConifer.git Add a date cutoff option for Scholars Portal books Signed-off-by: Dan Scott --- diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py index 6a823cba11..a31df9eae6 100755 --- a/tools/ebooks/prep_ebook_records.py +++ b/tools/ebooks/prep_ebook_records.py @@ -24,7 +24,7 @@ import json import codecs import copy import requests -from datetime import date +import datetime from bs4 import BeautifulSoup import traceback @@ -139,6 +139,9 @@ Optional arguments: -d / --duplicate : The name of the file to route ISBN duplicates to. + -D / --date : The ISO formatted date before which records should not be + processed (specific to ScholarsPortal-sourced records) + -F / --from-format : The format ('xml' or 'marc21') of the input file -T / --to-format : The format ('xml' or 'marc21') of the output file @@ -178,6 +181,7 @@ def consolidate_options(opts): "-c": "--consortium", "-C": "--clean", "-d": "--duplicate", + "-D": "--date", "-e": "--ebrary", "-F": "--from-format", "-I": "--isbn-sfx", @@ -255,6 +259,7 @@ def check_options(options): _string_opts = { "--authorization": "authorization", "--consortium": "consortium", + "--date": "date", "--duplicate": "duplicate", "--from-format": "from-format", "--note": "note", @@ -281,6 +286,7 @@ def check_options(options): clean_opts["input"] = _input clean_opts["output"] = _output clean_opts["settings"] = Institution() + clean_opts["date"] = datetime.date.fromisoformat(clean_opts["date"]) return clean_opts @@ -411,7 +417,7 @@ def check_libraries(options): def parse_opts(): """Get command-line arguments from the script""" try: - _short_opts = "i:o:a:p:P:ABLc:eCId:F:T:t:u:n:s:x:h" + _short_opts = "i:o:a:p:P:ABLc:eCIdD:F:T:t:u:n:s:x:h" _long_opts = [ "input=", "output=", @@ -426,6 +432,7 @@ def parse_opts(): "clean", "isbn-sfx", "duplicate=", + "date=", "from-format=", "to-format=", "tcn=", @@ -505,7 +512,22 @@ def process_record(record, options, files): else: del dupe_flags["url"] - if dupe_flags: + date_cutoff = False + + if options["date"]: + u = record["856"]["u"] + date_match = re.search(r"([\d]{4})-([\d]{2})-([\d]{2})", u) + + cutoff = options["date"] + d = datetime.date( + int(date_match.group(1)), + int(date_match.group(2)), + int(date_match.group(3)), + ) + if d <= cutoff: + date_cutoff = True + + if dupe_flags or date_cutoff: DUP_COUNT += 1 else: new_record = process_fields(record, options) @@ -1113,7 +1135,14 @@ def add_marc_source(record, options): marc_source = pymarc.Field( tag="598", indicators=[" ", " "], - subfields=["a", source, "b", date.today().isoformat(), "c", str(RECORD_COUNT)], + subfields=[ + "a", + source, + "b", + datetime.date.today().isoformat(), + "c", + str(RECORD_COUNT), + ], ) record.add_ordered_field(marc_source)