From 21a31da14cb875a3ce986bc3ccb4e359c5cec6fe Mon Sep 17 00:00:00 2001 From: dbs Date: Fri, 11 Feb 2011 21:40:49 +0000 Subject: [PATCH] Normalize ISBNs by stripping hyphens in search methods and in reporter.materialized_simple_record We weren't normalizing ISBNs in search or in reporter.materialized_simple_record which required users to exactly match the form of the ISBN as entered in the MARC record to get a successful search result - pretty hit or miss. The longer term fix in 2.0 and above is to make the ISBN search search against the identifier|isbn index, which also gets ISBN10/ISBN13 equivalence goodness. But this patch will be relatively easy to backport through to 1.6.1 and plugs some of the holes in our exposed APIs and search points in the short term. git-svn-id: svn://svn.open-ils.org/ILS/branches/rel_2_1@19433 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- .../lib/OpenILS/Application/Search/Biblio.pm | 4 ++ Open-ILS/src/sql/Pg/002.schema.config.sql | 2 +- Open-ILS/src/sql/Pg/reporter-schema.sql | 8 +-- .../upgrade/0485.schema.reporter_strip_isbns.sql | 74 ++++++++++++++++++++++ 4 files changed, 83 insertions(+), 5 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/0485.schema.reporter_strip_isbns.sql diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm index 96ca5d2f3e..4406125972 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm @@ -2095,6 +2095,8 @@ __PACKAGE__->register_method( sub biblio_search_isbn { my( $self, $client, $isbn ) = @_; $logger->debug("Searching ISBN $isbn"); + # Strip hyphens from incoming ISBNs + $isbn =~ s/-//g; my $recs = $U->storagereq('open-ils.storage.id_list.biblio.record_entry.search.isbn.atomic', $isbn); return { ids => $recs, count => scalar(@$recs) }; } @@ -2109,6 +2111,8 @@ sub biblio_search_isbn_batch { $logger->debug("Searching ISBNs @$isbn_list"); my @recs = (); my %rec_set = (); foreach my $isbn ( @$isbn_list ) { + # Strip hyphens from incoming ISBNs + $isbn =~ s/-//g; foreach my $rec ( @{ $U->storagereq( 'open-ils.storage.id_list.biblio.record_entry.search.isbn.atomic', $isbn ) } ) { diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index bf3107d0bb..c8496a9c18 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -70,7 +70,7 @@ CREATE TABLE config.upgrade_log ( install_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ); -INSERT INTO config.upgrade_log (version) VALUES ('0484'); -- miker +INSERT INTO config.upgrade_log (version) VALUES ('0485'); -- dbs CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/reporter-schema.sql b/Open-ILS/src/sql/Pg/reporter-schema.sql index d8f614ebe7..85b5c54457 100644 --- a/Open-ILS/src/sql/Pg/reporter-schema.sql +++ b/Open-ILS/src/sql/Pg/reporter-schema.sql @@ -126,8 +126,8 @@ SELECT r.id, series_title.value AS series_title, series_statement.value AS series_statement, summary.value AS summary, - ARRAY_ACCUM( SUBSTRING(isbn.value FROM $$^\S+$$) ) AS isbn, - ARRAY_ACCUM( REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn, + ARRAY_ACCUM( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) AS isbn, + ARRAY_ACCUM( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn, ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '650' AND subfield = 'a' AND record = r.id)) AS topic_subject, ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '651' AND subfield = 'a' AND record = r.id)) AS geographic_subject, ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '655' AND subfield = 'a' AND record = r.id)) AS genre, @@ -158,8 +158,8 @@ SELECT r.id, FIRST(author.value) AS author, ARRAY_TO_STRING(ARRAY_ACCUM( DISTINCT publisher.value), ', ') AS publisher, ARRAY_TO_STRING(ARRAY_ACCUM( DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$) ), ', ') AS pubdate, - ARRAY_ACCUM( DISTINCT SUBSTRING(isbn.value FROM $$^\S+$$) ) AS isbn, - ARRAY_ACCUM( DISTINCT SUBSTRING(issn.value FROM $$^\S+$$) ) AS issn + ARRAY_ACCUM( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) AS isbn, + ARRAY_ACCUM( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn FROM biblio.record_entry r LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a') diff --git a/Open-ILS/src/sql/Pg/upgrade/0485.schema.reporter_strip_isbns.sql b/Open-ILS/src/sql/Pg/upgrade/0485.schema.reporter_strip_isbns.sql new file mode 100644 index 0000000000..b1ac0e3250 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0485.schema.reporter_strip_isbns.sql @@ -0,0 +1,74 @@ +BEGIN; + +INSERT INTO config.upgrade_log (version) VALUES ('0485'); -- dbs + +CREATE OR REPLACE VIEW reporter.simple_record AS +SELECT r.id, + s.metarecord, + r.fingerprint, + r.quality, + r.tcn_source, + r.tcn_value, + title.value AS title, + uniform_title.value AS uniform_title, + author.value AS author, + publisher.value AS publisher, + SUBSTRING(pubdate.value FROM $$\d+$$) AS pubdate, + series_title.value AS series_title, + series_statement.value AS series_statement, + summary.value AS summary, + ARRAY_ACCUM( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) AS isbn, + ARRAY_ACCUM( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn, + ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '650' AND subfield = 'a' AND record = r.id)) AS topic_subject, + ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '651' AND subfield = 'a' AND record = r.id)) AS geographic_subject, + ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '655' AND subfield = 'a' AND record = r.id)) AS genre, + ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '600' AND subfield = 'a' AND record = r.id)) AS name_subject, + ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '610' AND subfield = 'a' AND record = r.id)) AS corporate_subject, + ARRAY((SELECT value FROM metabib.full_rec WHERE tag = '856' AND subfield IN ('3','y','u') AND record = r.id ORDER BY CASE WHEN subfield IN ('3','y') THEN 0 ELSE 1 END)) AS external_uri + FROM biblio.record_entry r + JOIN metabib.metarecord_source_map s ON (s.source = r.id) + LEFT JOIN metabib.full_rec uniform_title ON (r.id = uniform_title.record AND uniform_title.tag = '240' AND uniform_title.subfield = 'a') + LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') + LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag = '100' AND author.subfield = 'a') + LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND publisher.tag = '260' AND publisher.subfield = 'b') + LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND pubdate.tag = '260' AND pubdate.subfield = 'c') + LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z')) + LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a') + LEFT JOIN metabib.full_rec series_title ON (r.id = series_title.record AND series_title.tag IN ('830','440') AND series_title.subfield = 'a') + LEFT JOIN metabib.full_rec series_statement ON (r.id = series_statement.record AND series_statement.tag = '490' AND series_statement.subfield = 'a') + LEFT JOIN metabib.full_rec summary ON (r.id = summary.record AND summary.tag = '520' AND summary.subfield = 'a') + GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14; + +CREATE OR REPLACE VIEW reporter.old_super_simple_record AS +SELECT r.id, + r.fingerprint, + r.quality, + r.tcn_source, + r.tcn_value, + FIRST(title.value) AS title, + FIRST(author.value) AS author, + ARRAY_TO_STRING(ARRAY_ACCUM( DISTINCT publisher.value), ', ') AS publisher, + ARRAY_TO_STRING(ARRAY_ACCUM( DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$) ), ', ') AS pubdate, + ARRAY_ACCUM( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) AS isbn, + ARRAY_ACCUM( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn + FROM biblio.record_entry r + LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') + LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a') + LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND publisher.tag = '260' AND publisher.subfield = 'b') + LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND pubdate.tag = '260' AND pubdate.subfield = 'c') + LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z')) + LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a') + GROUP BY 1,2,3,4,5; + +-- Update reporter.materialized_simple_record with normalized ISBN values +-- This might not get all of them, but most ISBNs will have more than one hyphen +DELETE FROM reporter.materialized_simple_record WHERE id IN ( + SELECT record FROM metabib.full_rec WHERE tag = '020' AND subfield IN ('a', 'z') AND value LIKE '%-%-%' +); + +INSERT INTO reporter.materialized_simple_record + SELECT DISTINCT rossr.* FROM reporter.old_super_simple_record rossr INNER JOIN metabib.full_rec mfr ON mfr.record = rossr.id + WHERE mfr.tag = '020' AND mfr.subfield IN ('a', 'z') AND mfr.value LIKE '%-%-%' +; + +COMMIT; -- 2.11.0