From 37cfe0916caee78838bbe276a494b184e24ff96a Mon Sep 17 00:00:00 2001 From: dbs Date: Sat, 7 Aug 2010 03:53:01 +0000 Subject: [PATCH] Basic call-number sorting by classification scheme Until now, call numbers like 'K 22 .U748 v.18' and 'K 215 .E53 W37 1997' would sort incorrectly in Evergreen, making call number browsing painful. The following patch adds the following to Evergreen to support better call number sorting: * asset.call_number gains two new columns: - label_class - identifies the classification scheme for the given callnumber - label_sortkey - holds the normalized callnumber suitable for sorting * a new table, asset.call_number_class, which holds the names of each classification scheme and the name of the assorted normalization function * implementations of three normalization functions: * a generic normalizer * a Dewey Decimal Classification normalizer (courtesy the Koha project) * an LC classification normalizer (courtesy Bill Dueber's Library::CallNumber::LC) * a routine in Makefile.install to checkout and install Library::CallNumber::LC * call number sorting routines were modified to use label_sortkey instead of label, where I could find them * corresponding changes to fm_IDL.xml for the new columns + table A suggested means for applying this to an existing library would be to issue update statements against asset.call_number setting label_class to the value in asset.call_number_class that corresponds to owning_lib's scheme, for example: UPDATE asset.call_number SET label_class = 3 WHERE owning_lib IN (4,5); We need to teach the staff client holdings maintenance interfaces how to set the classification scheme for a given call number, and we should also create an org_unit setting that can set the preferred default classification scheme that the staff client should inherit. Or maybe we could just make that a sticky value and have a BEFORE trigger check the incoming value of label_class and only set it to the org_unit default value if it is NULL (to support bulk loads). git-svn-id: svn://svn.open-ils.org/ILS/trunk@17130 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/examples/fm_IDL.xml | 9 ++ Open-ILS/src/extras/Makefile.install | 7 +- .../src/perlmods/OpenILS/Application/SuperCat.pm | 10 +- Open-ILS/src/sql/Pg/002.schema.config.sql | 2 +- Open-ILS/src/sql/Pg/040.schema.asset.sql | 107 +++++++++++++++++++- .../0364.schema.call_number_normalization.sql | 108 +++++++++++++++++++++ 6 files changed, 234 insertions(+), 9 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/0364.schema.call_number_normalization.sql diff --git a/Open-ILS/examples/fm_IDL.xml b/Open-ILS/examples/fm_IDL.xml index 8ca6f9c63..bae749d53 100644 --- a/Open-ILS/examples/fm_IDL.xml +++ b/Open-ILS/examples/fm_IDL.xml @@ -1623,6 +1623,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + + + + + @@ -1638,6 +1644,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + @@ -1648,6 +1656,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + diff --git a/Open-ILS/src/extras/Makefile.install b/Open-ILS/src/extras/Makefile.install index 195e45c09..a358ac944 100644 --- a/Open-ILS/src/extras/Makefile.install +++ b/Open-ILS/src/extras/Makefile.install @@ -266,12 +266,12 @@ all: @echo "please specify an OS" && exit 0 # these should be the same for any distro -install: install_cpan install_js_sm install_libdbi +install: install_cpan install_js_sm install_libdbi install_library_callnumber_lc centos: install_centos_pgsql install_centos_rpms install_yaz install_cpan_marc install install_centos_perl create_ld_local install_cpan_safe rhel: install_redhat_pgsql install_centos_rpms install_yaz install_cpan_marc install install_centos_perl create_ld_local install_cpan_safe -fedora-13: install_fedora_13_rpms install_cpan install_cpan_marc install_cpan_fedora install_spidermonkey +fedora-13: install_fedora_13_rpms install_cpan install_cpan_marc install_cpan_fedora install_spidermonkey install_library_callnumber_lc debian-etch: etch generic_debian debian-lenny: lenny generic_debian @@ -343,6 +343,9 @@ install_libdbi: cd $(LIBDBI_DRIVERS) && ./configure \ --disable-docs --with-pgsql --enable-libdbi && make all install +install_library_callnumber_lc: + if [ ! -d Library-CallNumber-LC ]; then svn checkout http://library-callnumber-lc.googlecode.com/svn/trunk/perl/Library-CallNumber-LC; fi; + cd Library-CallNumber-LC && perl Makefile.PL && make install clean: make -C $(LIBDBI) clean diff --git a/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm b/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm index d02a5cf84..57e8a1d7a 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/SuperCat.pm @@ -305,7 +305,7 @@ sub cn_browse { }, { flesh => 1, flesh_fields => { acn => [qw/record owning_lib/] }, - order_by => { acn => "upper(label) desc, id desc, owning_lib desc" }, + order_by => { acn => "label_sortkey, upper(label) desc, id desc, owning_lib desc" }, limit => $before_limit, offset => abs($page) * $page_size - $before_offset, } @@ -323,7 +323,7 @@ sub cn_browse { }, { flesh => 1, flesh_fields => { acn => [qw/record owning_lib/] }, - order_by => { acn => "upper(label), id, owning_lib" }, + order_by => { acn => "label_sortkey, upper(label), id, owning_lib" }, limit => $after_limit, offset => abs($page) * $page_size - $after_offset, } @@ -428,7 +428,7 @@ sub cn_startwith { }, { flesh => 1, flesh_fields => { acn => [qw/record owning_lib/] }, - order_by => { acn => "upper(label) desc, id desc, owning_lib desc" }, + order_by => { acn => "label_sortkey, upper(label) desc, id desc, owning_lib desc" }, limit => $limit, offset => $offset, } @@ -446,7 +446,7 @@ sub cn_startwith { }, { flesh => 1, flesh_fields => { acn => [qw/record owning_lib/] }, - order_by => { acn => "upper(label), id, owning_lib" }, + order_by => { acn => "label_sortkey, upper(label), id, owning_lib" }, limit => $limit, offset => $offset, } @@ -1760,7 +1760,7 @@ sub new_record_holdings { }, ( $limit > -1 ? ( limit => $limit ) : () ), ( $offset ? ( offset => $offset ) : () ), - order_by => { acn => { label => {} } } + order_by => { acn => { label_sortkey => {} } } } )->gather(1); diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index cee59a291..3b12cbf6a 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -68,7 +68,7 @@ CREATE TABLE config.upgrade_log ( install_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ); -INSERT INTO config.upgrade_log (version) VALUES ('0363'); -- miker +INSERT INTO config.upgrade_log (version) VALUES ('0364'); -- dbs CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/040.schema.asset.sql b/Open-ILS/src/sql/Pg/040.schema.asset.sql index 23be45af2..0d91bdd80 100644 --- a/Open-ILS/src/sql/Pg/040.schema.asset.sql +++ b/Open-ILS/src/sql/Pg/040.schema.asset.sql @@ -167,6 +167,103 @@ CREATE TABLE asset.uri ( active BOOL NOT NULL DEFAULT TRUE ); +CREATE TABLE asset.call_number_class ( + id bigserial PRIMARY KEY, + name TEXT NOT NULL, + normalizer TEXT NOT NULL DEFAULT 'asset.normalize_generic' +); + +CREATE OR REPLACE FUNCTION asset.label_normalizer() RETURNS TRIGGER AS $func$ +DECLARE + sortkey TEXT := ''; +BEGIN + sortkey := NEW.label_sortkey; + + EXECUTE 'SELECT ' || acnc.normalizer || '(' || + quote_literal( NEW.label ) || ')' + FROM asset.call_number_class acnc + WHERE acnc.id = NEW.label_class + INTO sortkey; + + NEW.label_sortkey = sortkey; + + RETURN NEW; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION asset.label_normalizer_generic(TEXT) RETURNS TEXT AS $func$ + # Created after looking at the Koha C4::ClassSortRoutine::Generic module, + # thus could probably be considered a derived work, although nothing was + # directly copied - but to err on the safe side of providing attribution: + # Copyright (C) 2007 LibLime + # Licensed under the GPL v2 or later + + use strict; + use warnings; + + # Converts the callnumber to uppercase + # Strips spaces from start and end of the call number + # Converts anything other than letters, digits, and periods into underscores + # Collapses multiple underscores into a single underscore + my $callnum = uc(shift); + $callnum =~ s/^\s//g; + $callnum =~ s/\s$//g; + $callnum =~ s/[^A-Z0-9_.]/_/g; + $callnum =~ s/_{2,}/_/g; + + return $callnum; +$func$ LANGUAGE PLPERLU; + +CREATE OR REPLACE FUNCTION asset.label_normalizer_dewey(TEXT) RETURNS TEXT AS $func$ + # Derived from the Koha C4::ClassSortRoutine::Dewey module + # Copyright (C) 2007 LibLime + # Licensed under the GPL v2 or later + + use strict; + use warnings; + + my $init = uc(shift); + $init =~ s/^\s+//; + $init =~ s/\s+$//; + $init =~ s!/!!g; + $init =~ s/^([\p{IsAlpha}]+)/$1 /; + my @tokens = split /\.|\s+/, $init; + my $digit_group_count = 0; + for (my $i = 0; $i <= $#tokens; $i++) { + if ($tokens[$i] =~ /^\d+$/) { + $digit_group_count++; + if (2 == $digit_group_count) { + $tokens[$i] = sprintf("%-15.15s", $tokens[$i]); + $tokens[$i] =~ tr/ /0/; + } + } + } + my $key = join("_", @tokens); + $key =~ s/[^\p{IsAlnum}_]//g; + + return $key; + +$func$ LANGUAGE PLPERLU; + +CREATE OR REPLACE FUNCTION asset.label_normalizer_lc(TEXT) RETURNS TEXT AS $func$ + use strict; + use warnings; + + # Library::CallNumber::LC is currently hosted at http://code.google.com/p/library-callnumber-lc/ + # The author hopes to upload it to CPAN some day, which would make our lives easier + use Library::CallNumber::LC; + + my $callnum = Library::CallNumber::LC->new(shift); + return $callnum->normalize(); + +$func$ LANGUAGE PLPERLU; + +INSERT INTO asset.call_number_class (name, normalizer) VALUES + ('Generic', 'asset.label_normalizer_generic'), + ('Dewey (DDC)', 'asset.label_normalizer_dewey'), + ('Library of Congress (LC)', 'asset.label_normalizer_lc') +; + CREATE TABLE asset.call_number ( id bigserial PRIMARY KEY, creator BIGINT NOT NULL, @@ -176,15 +273,23 @@ CREATE TABLE asset.call_number ( record bigint NOT NULL, owning_lib INT NOT NULL, label TEXT NOT NULL, - deleted BOOL NOT NULL DEFAULT FALSE + deleted BOOL NOT NULL DEFAULT FALSE, + label_class BIGINT DEFAULT 1 NOT NULL + REFERENCES asset.call_number_class(id) + DEFERRABLE INITIALLY DEFERRED, + label_sortkey TEXT ); CREATE INDEX asset_call_number_record_idx ON asset.call_number (record); CREATE INDEX asset_call_number_creator_idx ON asset.call_number (creator); CREATE INDEX asset_call_number_editor_idx ON asset.call_number (editor); CREATE INDEX asset_call_number_dewey_idx ON asset.call_number (public.call_number_dewey(label)); CREATE INDEX asset_call_number_upper_label_id_owning_lib_idx ON asset.call_number (upper(label),id,owning_lib); +CREATE INDEX asset_call_number_label_sortkey ON asset.call_number(label_sortkey); CREATE UNIQUE INDEX asset_call_number_label_once_per_lib ON asset.call_number (record, owning_lib, label) WHERE deleted = FALSE OR deleted IS FALSE; CREATE RULE protect_cn_delete AS ON DELETE TO asset.call_number DO INSTEAD UPDATE asset.call_number SET deleted = TRUE WHERE OLD.id = asset.call_number.id; +CREATE TRIGGER asset_label_sortkey_trigger + BEFORE UPDATE OR INSERT ON asset.call_number + FOR EACH ROW EXECUTE PROCEDURE asset.label_normalizer(); CREATE TABLE asset.uri_call_number_map ( id BIGSERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/0364.schema.call_number_normalization.sql b/Open-ILS/src/sql/Pg/upgrade/0364.schema.call_number_normalization.sql new file mode 100644 index 000000000..5d3fe3391 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0364.schema.call_number_normalization.sql @@ -0,0 +1,108 @@ +BEGIN; + +INSERT INTO config.upgrade_log (version) VALUES ('0364'); -- dbs + +CREATE TABLE asset.call_number_class ( + id bigserial PRIMARY KEY, + name TEXT NOT NULL, + normalizer TEXT NOT NULL DEFAULT 'asset.normalize_generic' +); + +ALTER TABLE asset.call_number ADD COLUMN label_class BIGINT DEFAULT 1 NOT NULL REFERENCES asset.call_number_class(id) DEFERRABLE INITIALLY DEFERRED; +ALTER TABLE asset.call_number ADD COLUMN label_sortkey TEXT; +CREATE INDEX asset_call_number_label_sortkey ON asset.call_number(label_sortkey); +ALTER TABLE auditor.asset_call_number_history ADD COLUMN label_class BIGINT NOT NULL; +ALTER TABLE auditor.asset_call_number_history ADD COLUMN label_sortkey TEXT; + +CREATE OR REPLACE FUNCTION asset.label_normalizer() RETURNS TRIGGER AS $func$ +DECLARE + sortkey TEXT := ''; +BEGIN + sortkey := NEW.label_sortkey; + + EXECUTE 'SELECT ' || acnc.normalizer || '(' || + quote_literal( NEW.label ) || ')' + FROM asset.call_number_class acnc + WHERE acnc.id = NEW.label_class + INTO sortkey; + + NEW.label_sortkey = sortkey; + + RETURN NEW; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION asset.label_normalizer_generic(TEXT) RETURNS TEXT AS $func$ + # Created after looking at the Koha C4::ClassSortRoutine::Generic module, + # thus could probably be considered a derived work, although nothing was + # directly copied - but to err on the safe side of providing attribution: + # Copyright (C) 2007 LibLime + # Licensed under the GPL v2 or later + + use strict; + use warnings; + + # Converts the callnumber to uppercase + # Strips spaces from start and end of the call number + # Converts anything other than letters, digits, and periods into underscores + # Collapses multiple underscores into a single underscore + my $callnum = uc(shift); + $callnum =~ s/^\s//g; + $callnum =~ s/\s$//g; + $callnum =~ s/[^A-Z0-9_.]/_/g; + $callnum =~ s/_{2,}/_/g; + + return $callnum; +$func$ LANGUAGE PLPERLU; + +CREATE OR REPLACE FUNCTION asset.label_normalizer_dewey(TEXT) RETURNS TEXT AS $func$ + # Derived from the Koha C4::ClassSortRoutine::Dewey module + # Copyright (C) 2007 LibLime + # Licensed under the GPL v2 or later + + use strict; + use warnings; + + my $init = uc(shift); + $init =~ s/^\s+//; + $init =~ s/\s+$//; + $init =~ s!/!!g; + $init =~ s/^([\p{IsAlpha}]+)/$1 /; + my @tokens = split /\.|\s+/, $init; + my $digit_group_count = 0; + for (my $i = 0; $i <= $#tokens; $i++) { + if ($tokens[$i] =~ /^\d+$/) { + $digit_group_count++; + if (2 == $digit_group_count) { + $tokens[$i] = sprintf("%-15.15s", $tokens[$i]); + $tokens[$i] =~ tr/ /0/; + } + } + } + my $key = join("_", @tokens); + $key =~ s/[^\p{IsAlnum}_]//g; + + return $key; + +$func$ LANGUAGE PLPERLU; + +CREATE OR REPLACE FUNCTION asset.label_normalizer_lc(TEXT) RETURNS TEXT AS $func$ + use strict; + use warnings; + + # Library::CallNumber::LC is currently hosted at http://code.google.com/p/library-callnumber-lc/ + # The author hopes to upload it to CPAN some day, which would make our lives easier + use Library::CallNumber::LC; + + my $callnum = Library::CallNumber::LC->new(shift); + return $callnum->normalize(); + +$func$ LANGUAGE PLPERLU; + +INSERT INTO asset.call_number_class (name, normalizer) VALUES + ('Generic', 'asset.label_normalizer_generic'), + ('Dewey (DDC)', 'asset.label_normalizer_dewey'), + ('Library of Congress (LC)', 'asset.label_normalizer_lc') +; + +COMMIT; -- 2.11.0