<class id="are" controller="open-ils.cstore open-ils.pcrud" oils_obj:fieldmapper="authority::record_entry" oils_persist:tablename="authority.record_entry" reporter:label="Authority Record Entry">
<fields oils_persist:primary="id" oils_persist:sequence="authority.record_entry_id_seq">
<field name="active" reporter:datatype="bool"/>
- <field name="arn_source" />
- <field name="arn_value" />
<field name="create_date" reporter:datatype="timestamp"/>
<field name="creator" />
<field name="deleted" reporter:datatype="bool"/>
while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
next if ($rec == -1);
my $id = $count;
- my $_001 = $rec->field('001');
- my $arn = $count;
- $arn = $_001->data if ($_001);
(my $xml = $rec->as_xml_record()) =~ s/\n//sog;
$xml =~ s/^<\?xml.+\?\s*>//go;
$bib->create_date('now');
$bib->editor($user);
$bib->edit_date('now');
- $bib->arn_source('LEGACY');
- $bib->arn_value($arn);
$bib->last_xact_id('IMPORT-'.$starttime);
print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
$rec->edit_date('now');
$rec->marc($U->entityize($marc_doc->documentElement->toString));
- my ($arn, $evt) = find_arn($e, $marc_doc);
- return $evt if $evt;
- $rec->arn_value($arn);
-
$rec = $e->create_authority_record_entry($rec) or return $e->die_event;
# we don't care about the result, just fire off the request
return $rec;
}
-sub find_arn {
- my($e, $marc_doc) = @_;
-
- my $xpath = '//marc:controlfield[@tag="001"]';
- my ($arn) = $marc_doc->documentElement->findvalue($xpath);
-
- if(my $existing_rec = $e->search_authority_record_entry({arn_value => $arn, deleted => 'f'})->[0]) {
- # this arn is taken
- return (
- undef,
- OpenILS::Event->new(
- 'AUTHORITY_RECORD_NUMBER_EXISTS',
- payload => {existing_record => $existing_rec, arn => $arn}
- )
- );
- }
-
- return ($arn);
-}
-
1;
authority::record_entry->table( 'authority_record_entry' );
authority::record_entry->columns( Primary => qw/id/ );
-authority::record_entry->columns( Essential => qw/arn_source arn_value creator editor
+authority::record_entry->columns( Essential => qw/creator editor
create_date edit_date source active
deleted marc last_xact_id/ );
my $tcn_v = 'tcn_value';
my $tcn_s = 'tcn_source';
- if ($type eq 'authority') {
- $tcn_v = 'arn_value';
- $tcn_s = 'arn_source';
- }
-
my $holdings = $cgi->param('holdings') if ($type eq 'biblio');
my $location = $cgi->param('location') || 'gaaagpl'; # just because...
NEW.marc,
E'(</(?:[^:]*?:)?record>)',
E'<datafield tag="901" ind1=" " ind2=" ">' ||
- '<subfield code="a">' || NEW.arn_value || E'</subfield>' ||
- '<subfield code="b">' || NEW.arn_source || E'</subfield>' ||
'<subfield code="c">' || NEW.id || E'</subfield>' ||
'<subfield code="t">' || TG_TABLE_SCHEMA || E'</subfield>' ||
E'</datafield>\\1'
CREATE TABLE authority.record_entry (
id BIGSERIAL PRIMARY KEY,
- arn_source TEXT NOT NULL DEFAULT 'AUTOGEN',
- arn_value TEXT NOT NULL,
creator INT NOT NULL DEFAULT 1,
editor INT NOT NULL DEFAULT 1,
create_date TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
);
CREATE INDEX authority_record_entry_creator_idx ON authority.record_entry ( creator );
CREATE INDEX authority_record_entry_editor_idx ON authority.record_entry ( editor );
-CREATE UNIQUE INDEX authority_record_unique_tcn ON authority.record_entry (arn_source,arn_value) WHERE deleted = FALSE OR deleted IS FALSE;
CREATE TRIGGER a_marcxml_is_well_formed BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE biblio.check_marcxml_well_formed();
CREATE TRIGGER b_maintain_901 BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE maintain_901();
CREATE TRIGGER c_maintain_control_numbers BEFORE INSERT OR UPDATE ON authority.record_entry FOR EACH ROW EXECUTE PROCEDURE maintain_control_numbers();
*/
$$;
+-- Intended to be used in a unique index on authority.record_entry like so:
+-- CREATE UNIQUE INDEX unique_by_heading_and_thesaurus
+-- ON authority.record_entry (authority.normalize_heading(marc))
+-- WHERE deleted IS FALSE or deleted = FALSE;
+CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $func$
+ use strict;
+ use warnings;
+
+ use utf8;
+ use MARC::Record;
+ use MARC::File::XML (BinaryEncoding => 'UTF8');
+ use UUID::Tiny ':std';
+
+ my $xml = shift() or return undef;
+
+ my $r;
+
+ # Prevent errors in XML parsing from blowing out ungracefully
+ eval {
+ $r = MARC::Record->new_from_xml( $xml );
+ 1;
+ } or do {
+ return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml);
+ };
+
+ if (!$r) {
+ return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml);
+ }
+
+ # From http://www.loc.gov/standards/sourcelist/subject.html
+ my $thes_code_map = {
+ a => 'lcsh',
+ b => 'lcshac',
+ c => 'mesh',
+ d => 'nal',
+ k => 'cash',
+ n => 'notapplicable',
+ r => 'aat',
+ s => 'sears',
+ v => 'rvm',
+ };
+
+ # Default to "No attempt to code" if the leader is horribly broken
+ my $fixed_field = $r->field('008');
+ my $thes_char = '|';
+ if ($fixed_field) {
+ $thes_char = substr($fixed_field->data(), 11, 1) || '|';
+ }
+
+ my $thes_code = 'UNDEFINED';
+
+ if ($thes_char eq 'z') {
+ # Grab the 040 $f per http://www.loc.gov/marc/authority/ad040.html
+ $thes_code = $r->subfield('040', 'f') || 'UNDEFINED';
+ } elsif ($thes_code_map->{$thes_char}) {
+ $thes_code = $thes_code_map->{$thes_char};
+ }
+
+ my $auth_txt = '';
+ my $head = $r->field('1..');
+ if ($head) {
+ # Concatenate all of these subfields together, prefixed by their code
+ # to prevent collisions along the lines of "Fiction, North Carolina"
+ foreach my $sf ($head->subfields()) {
+ $auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1];
+ }
+ }
+
+ # Perhaps better to parameterize the spi and pass as a parameter
+ $auth_txt =~ s/'//go;
+
+ if ($auth_txt) {
+ my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text");
+ my $norm_txt = $result->{rows}[0]->{norm_text};
+ return $head->tag() . "_" . $thes_code . " " . $norm_txt;
+ }
+
+ return 'NOHEADING_' . $thes_code . ' ' . create_uuid_as_string(UUID_MD5, $xml);
+$func$ LANGUAGE 'plperlu' IMMUTABLE;
+
+COMMENT ON FUNCTION authority.normalize_heading( TEXT ) IS $$
+/**
+* Extract the authority heading, thesaurus, and NACO-normalized values
+* from an authority record. The primary purpose is to build a unique
+* index to defend against duplicated authority records from the same
+* thesaurus.
+*/
+$$;
ALTER TABLE config.org_unit_setting_type ADD CONSTRAINT view_perm_fkey FOREIGN KEY (view_perm) REFERENCES permission.perm_list (id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
ALTER TABLE config.org_unit_setting_type ADD CONSTRAINT update_perm_fkey FOREIGN KEY (update_perm) REFERENCES permission.perm_list (id) ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED;
+CREATE INDEX by_heading_and_thesaurus ON authority.record_entry (authority.normalize_heading(marc)) WHERE deleted IS FALSE or deleted = FALSE;
+
COMMIT;
--- /dev/null
+BEGIN;
+
+INSERT INTO config.upgrade_log (version) VALUES ('0400'); -- dbs
+
+CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $func$
+ use strict;
+ use warnings;
+ use MARC::Record;
+ use MARC::File::XML (BinaryEncoding => 'UTF8');
+
+ my $xml = shift();
+ my $r = MARC::Record->new_from_xml( $xml );
+ return undef unless ($r);
+
+ # From http://www.loc.gov/standards/sourcelist/subject.html
+ my $thes_code_map = {
+ a => 'lcsh',
+ b => 'lcshac',
+ c => 'mesh',
+ d => 'nal',
+ k => 'cash',
+ n => 'notapplicable',
+ r => 'aat',
+ s => 'sears',
+ v => 'rvm',
+ };
+
+ # Default to "No attempt to code" if the leader is horribly broken
+ my $thes_char = substr($r->field('008')->data(), 11, 1) || '|';
+
+ my $thes_code = 'UNDEFINED';
+
+ if ($thes_char eq 'z') {
+ # Grab the 040 $f per http://www.loc.gov/marc/authority/ad040.html
+ $thes_code = $r->subfield('040', 'f') || 'UNDEFINED';
+ } elsif ($thes_code_map->{$thes_char}) {
+ $thes_code = $thes_code_map->{$thes_char};
+ }
+
+ my $head = $r->field('1..');
+ my $auth_txt = '';
+ foreach my $sf ($head->subfields()) {
+ $auth_txt .= $sf->[1];
+ }
+
+
+ # Perhaps better to parameterize the spi and pass as a parameter
+ $auth_txt =~ s/'//go;
+ my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text");
+ my $norm_txt = $result->{rows}[0]->{norm_text};
+
+ return $head->tag() . "_" . $thes_code . " " . $norm_txt;
+$func$ LANGUAGE 'plperlu' IMMUTABLE;
+
+COMMENT ON FUNCTION authority.normalize_heading( TEXT ) IS $$
+/**
+* Extract the authority heading, thesaurus, and NACO-normalized values
+* from an authority record. The primary purpose is to build a unique
+* index to defend against duplicated authority records from the same
+* thesaurus.
+*/
+$$;
+
+COMMIT;
+
+-- Do this outside of a transaction to avoid failure if duplicate
+-- authority heading / thesaurus / heading text entries already
+-- exist in the database:
+CREATE UNIQUE INDEX unique_by_heading_and_thesaurus
+ ON authority.record_entry (authority.normalize_heading(marc))
+ WHERE deleted IS FALSE or deleted = FALSE
+;
+
+-- If the unique index fails, uncomment the following to create
+-- a regular index that will help find the duplicates in a hurry:
+--CREATE INDEX by_heading_and_thesaurus
+-- ON authority.record_entry (authority.normalize_heading(marc))
+-- WHERE deleted IS FALSE or deleted = FALSE
+--;
+
+-- Then find the duplicates like so to get an idea of how much
+-- pain you're looking at to clean things up:
+--SELECT id, authority.normalize_heading(marc)
+-- FROM authority.record_entry
+-- WHERE authority.normalize_heading(marc) IN (
+-- SELECT authority.normalize_heading(marc)
+-- FROM authority.record_entry
+-- GROUP BY authority.normalize_heading(marc)
+-- HAVING COUNT(*) > 1
+-- )
+--;
+
+-- Once you have removed the duplicates and the CREATE UNIQUE INDEX
+-- statement succeeds, drop the temporary index to avoid unnecessary
+-- duplication:
+-- DROP INDEX authority.by_heading_and_thesaurus;
--- /dev/null
+BEGIN;
+
+INSERT INTO config.upgrade_log (version) VALUES ('0401'); -- dbs
+
+DROP INDEX authority.authority_record_unique_tcn;
+ALTER TABLE authority.record_entry DROP COLUMN arn_value;
+ALTER TABLE authority.record_entry DROP COLUMN arn_source;
+
+COMMIT;
--- /dev/null
+BEGIN;
+
+-- Make the authority heading normalization routine more defensive
+-- Also drop back to a plain index for 2.0, we will get more restrictive over time
+
+INSERT INTO config.upgrade_log (version) VALUES ('0402'); -- dbs
+
+CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $func$
+ use strict;
+ use warnings;
+
+ use utf8;
+ use MARC::Record;
+ use MARC::File::XML (BinaryEncoding => 'UTF8');
+ use UUID::Tiny ':std';
+
+ my $xml = shift() or return undef;
+
+ my $r;
+
+ # Prevent errors in XML parsing from blowing out ungracefully
+ eval {
+ $r = MARC::Record->new_from_xml( $xml );
+ 1;
+ } or do {
+ return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml);
+ };
+
+ if (!$r) {
+ return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml);
+ }
+
+ # From http://www.loc.gov/standards/sourcelist/subject.html
+ my $thes_code_map = {
+ a => 'lcsh',
+ b => 'lcshac',
+ c => 'mesh',
+ d => 'nal',
+ k => 'cash',
+ n => 'notapplicable',
+ r => 'aat',
+ s => 'sears',
+ v => 'rvm',
+ };
+
+ # Default to "No attempt to code" if the leader is horribly broken
+ my $fixed_field = $r->field('008');
+ my $thes_char = '|';
+ if ($fixed_field) {
+ $thes_char = substr($fixed_field->data(), 11, 1) || '|';
+ }
+
+ my $thes_code = 'UNDEFINED';
+
+ if ($thes_char eq 'z') {
+ # Grab the 040 $f per http://www.loc.gov/marc/authority/ad040.html
+ $thes_code = $r->subfield('040', 'f') || 'UNDEFINED';
+ } elsif ($thes_code_map->{$thes_char}) {
+ $thes_code = $thes_code_map->{$thes_char};
+ }
+
+ my $auth_txt = '';
+ my $head = $r->field('1..');
+ if ($head) {
+ # Concatenate all of these subfields together, prefixed by their code
+ # to prevent collisions along the lines of "Fiction, North Carolina"
+ foreach my $sf ($head->subfields()) {
+ $auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1];
+ }
+ }
+
+ # Perhaps better to parameterize the spi and pass as a parameter
+ $auth_txt =~ s/'//go;
+
+ if ($auth_txt) {
+ my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text");
+ my $norm_txt = $result->{rows}[0]->{norm_text};
+ return $head->tag() . "_" . $thes_code . " " . $norm_txt;
+ }
+
+ return 'NOHEADING_' . $thes_code . ' ' . create_uuid_as_string(UUID_MD5, $xml);
+$func$ LANGUAGE 'plperlu' IMMUTABLE;
+
+DROP INDEX authority.unique_by_heading_and_thesaurus;
+
+CREATE INDEX by_heading_and_thesaurus
+ ON authority.record_entry (authority.normalize_heading(marc))
+ WHERE deleted IS FALSE or deleted = FALSE
+;
+
+COMMIT;
netscape.security.PrivilegeManager.enablePrivilege('UniversalXPConnect');
win = window.open('/xul/server/cat/marcedit.xul'); // XXX version?
- /* Ugly hack to satisfy arn_value and last_xact_id db schema reqs */
+ // Match marc2are.pl last_xact_id format, roughly
var now = new Date;
- var arn = 'AUTOGEN' + Date.parse(now);
+ var xact_id = 'IMPORT-' + Date.parse(now);
win.xulG = {
"record": {"marc": marcxml, "rtype": "are"},
"func": function(xmlString) {
var rec = new are();
rec.marc(xmlString);
- rec.arn_value(arn);
- rec.last_xact_id(arn);
+ rec.last_xact_id(xact_id);
rec.isnew(true);
pcrud.create(rec, {
"oncomplete": function () {