import codecs
import copy
import requests
-from datetime import date
+import datetime
from bs4 import BeautifulSoup
import traceback
-d / --duplicate : The name of the file to route ISBN duplicates to.
+ -D / --date : The ISO formatted date before which records should not be
+ processed (specific to ScholarsPortal-sourced records)
+
-F / --from-format : The format ('xml' or 'marc21') of the input file
-T / --to-format : The format ('xml' or 'marc21') of the output file
"-c": "--consortium",
"-C": "--clean",
"-d": "--duplicate",
+ "-D": "--date",
"-e": "--ebrary",
"-F": "--from-format",
"-I": "--isbn-sfx",
_string_opts = {
"--authorization": "authorization",
"--consortium": "consortium",
+ "--date": "date",
"--duplicate": "duplicate",
"--from-format": "from-format",
"--note": "note",
clean_opts["input"] = _input
clean_opts["output"] = _output
clean_opts["settings"] = Institution()
+ clean_opts["date"] = datetime.date.fromisoformat(clean_opts["date"])
return clean_opts
def parse_opts():
"""Get command-line arguments from the script"""
try:
- _short_opts = "i:o:a:p:P:ABLc:eCId:F:T:t:u:n:s:x:h"
+ _short_opts = "i:o:a:p:P:ABLc:eCIdD:F:T:t:u:n:s:x:h"
_long_opts = [
"input=",
"output=",
"clean",
"isbn-sfx",
"duplicate=",
+ "date=",
"from-format=",
"to-format=",
"tcn=",
else:
del dupe_flags["url"]
- if dupe_flags:
+ date_cutoff = False
+
+ if options["date"]:
+ u = record["856"]["u"]
+ date_match = re.search(r"([\d]{4})-([\d]{2})-([\d]{2})", u)
+
+ cutoff = options["date"]
+ d = datetime.date(
+ int(date_match.group(1)),
+ int(date_match.group(2)),
+ int(date_match.group(3)),
+ )
+ if d <= cutoff:
+ date_cutoff = True
+
+ if dupe_flags or date_cutoff:
DUP_COUNT += 1
else:
new_record = process_fields(record, options)
marc_source = pymarc.Field(
tag="598",
indicators=[" ", " "],
- subfields=["a", source, "b", date.today().isoformat(), "c", str(RECORD_COUNT)],
+ subfields=[
+ "a",
+ source,
+ "b",
+ datetime.date.today().isoformat(),
+ "c",
+ str(RECORD_COUNT),
+ ],
)
record.add_ordered_field(marc_source)