From e9c99e62f39dfc51bf7c2a5b855afa909cdd2f3b Mon Sep 17 00:00:00 2001 From: Dan Scott Date: Thu, 4 Oct 2012 18:17:12 -0400 Subject: [PATCH] Ebooks: Only output one record, no matter how many dupes If you specify dupe-checking by TCN, URL, and ISBN, you only want to generate one output record per input record, no matter how many types of matches you get - so short-circuit the logic. Also, handle the case where you're not specifying all dupe checks :) Signed-off-by: Dan Scott --- tools/ebooks/prep_ebook_records.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py index f5657eb3e7..65fd373307 100644 --- a/tools/ebooks/prep_ebook_records.py +++ b/tools/ebooks/prep_ebook_records.py @@ -429,33 +429,39 @@ def process_records(options): dupe_flags = {} - if files['duplicate']: + if 'duplicate' in files: tmp_record = process_fields(copy.deepcopy(record), options) bib_id, dupe_flags['isbn'] = isbn_check(tmp_record) if dupe_flags['isbn']: tmp_record = add_dupe_field(tmp_record, bib_id) files['duplicate'].write(tmp_record) + else: + del(dupe_flags['isbn']) - if files['tcn']: + if 'tcn' in files and len(dupe_flags) == 0: tmp_record = process_fields(copy.deepcopy(record), options) bib_id, dupe_flags['tcn'] = tcn_check(tmp_record) if dupe_flags['tcn']: tmp_record = add_dupe_field(tmp_record, bib_id) files['tcn'].write(tmp_record) + else: + del(dupe_flags['tcn']) - if files['url']: + if 'url' in files and len(dupe_flags) == 0: tmp_record = process_fields(copy.deepcopy(record), options) bib_id, dupe_flags['url'] = url_check(tmp_record, options) if dupe_flags['url']: tmp_record = add_dupe_field(tmp_record, bib_id) files['url'].write(tmp_record) + else: + del(dupe_flags['url']) if len(dupe_flags): DUP_COUNT += 1 else: new_record = process_fields(record, options) writer.write(new_record) - if (files['sample'] and ( + if ('sample' in files and ( (RECORD_COUNT == 1) or (RECORD_COUNT % 100 == 0) )): files['sample'].write(new_record) -- 2.11.0