CONSTRAINT add_replace_strip_or_preserve CHECK ((preserve_spec IS NOT NULL OR replace_spec IS NOT NULL) OR (preserve_spec IS NULL AND replace_spec IS NULL))
);
- FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP
- val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length );
- RETURN val;
- END LOOP;
+CREATE OR REPLACE FUNCTION vandelay.marc21_record_type( marc TEXT ) RETURNS config.marc21_rec_type_map AS $func$
+DECLARE
+ ldr TEXT;
+ tval TEXT;
+ tval_rec RECORD;
+ bval TEXT;
+ bval_rec RECORD;
+ retval config.marc21_rec_type_map%ROWTYPE;
+BEGIN
+ ldr := oils_xpath_string( '//*[local-name()="leader"]', marc );
+
+ IF ldr IS NULL OR ldr = '' THEN
+ SELECT * INTO retval FROM config.marc21_rec_type_map WHERE code = 'BKS';
+ RETURN retval;
+ END IF;
+
+ SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same
+ SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same
+
+
+ tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length );
+ bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length );
+
+ -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr;
+
+ SELECT * INTO retval FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%';
+
+
+ IF retval.code IS NULL THEN
+ SELECT * INTO retval FROM config.marc21_rec_type_map WHERE code = 'BKS';
+ END IF;
+
+ RETURN retval;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field( marc TEXT, ff TEXT ) RETURNS TEXT AS $func$
+DECLARE
+ rtype TEXT;
+ ff_pos RECORD;
+ tag_data RECORD;
+ val TEXT;
+BEGIN
+ rtype := (vandelay.marc21_record_type( marc )).code;
+ FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP
- FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(tag) || '"]/text()', marc ) ) x(value) LOOP
- output.ff_value := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length );
- IF output.ff_value IS NULL THEN output.ff_value := REPEAT( ff_pos.default_val, ff_pos.length ); END IF;
- RETURN NEXT output;
- output.ff_value := NULL;
- END LOOP;
-
++ IF ff_pos.tag = 'ldr' THEN
++ val := oils_xpath_string('//*[local-name()="leader"]', marc);
++ IF val IS NOT NULL THEN
++ val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length );
++ RETURN val;
++ END IF;
++ ELSE
++ FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP
++ val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length );
++ RETURN val;
++ END LOOP;
++ END IF;
+ val := REPEAT( ff_pos.default_val, ff_pos.length );
+ RETURN val;
+ END LOOP;
+
+ RETURN NULL;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE TYPE biblio.record_ff_map AS (record BIGINT, ff_name TEXT, ff_value TEXT);
+CREATE OR REPLACE FUNCTION vandelay.marc21_extract_all_fixed_fields( marc TEXT ) RETURNS SETOF biblio.record_ff_map AS $func$
+DECLARE
+ tag_data TEXT;
+ rtype TEXT;
+ ff_pos RECORD;
+ output biblio.record_ff_map%ROWTYPE;
+BEGIN
+ rtype := (vandelay.marc21_record_type( marc )).code;
+
+ FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE rec_type = rtype ORDER BY tag DESC LOOP
+ output.ff_name := ff_pos.fixed_field;
+ output.ff_value := NULL;
+
++ IF ff_pos.tag = 'ldr' THEN
++ output.ff_value := oils_xpath_string('//*[local-name()="leader"]', marc);
++ IF output.ff_value IS NOT NULL THEN
++ output.ff_value := SUBSTRING( output.ff_value, ff_pos.start_pos + 1, ff_pos.length );
++ RETURN NEXT output;
++ output.ff_value := NULL;
++ END IF;
++ ELSE
++ FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP
++ output.ff_value := SUBSTRING( tag_data, ff_pos.start_pos + 1, ff_pos.length );
++ IF output.ff_value IS NULL THEN output.ff_value := REPEAT( ff_pos.default_val, ff_pos.length ); END IF;
++ RETURN NEXT output;
++ output.ff_value := NULL;
++ END LOOP;
++ END IF;
++
+ END LOOP;
+
+ RETURN;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE TYPE biblio.marc21_physical_characteristics AS ( id INT, record BIGINT, ptype TEXT, subfield INT, value INT );
+CREATE OR REPLACE FUNCTION vandelay.marc21_physical_characteristics( marc TEXT) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$
+DECLARE
+ rowid INT := 0;
+ _007 TEXT;
+ ptype config.marc21_physical_characteristic_type_map%ROWTYPE;
+ psf config.marc21_physical_characteristic_subfield_map%ROWTYPE;
+ pval config.marc21_physical_characteristic_value_map%ROWTYPE;
+ retval biblio.marc21_physical_characteristics%ROWTYPE;
+BEGIN
+
+ _007 := oils_xpath_string( '//*[@tag="007"]', marc );
+
+ IF _007 IS NOT NULL AND _007 <> '' THEN
+ SELECT * INTO ptype FROM config.marc21_physical_characteristic_type_map WHERE ptype_key = SUBSTRING( _007, 1, 1 );
+
+ IF ptype.ptype_key IS NOT NULL THEN
+ FOR psf IN SELECT * FROM config.marc21_physical_characteristic_subfield_map WHERE ptype_key = ptype.ptype_key LOOP
+ SELECT * INTO pval FROM config.marc21_physical_characteristic_value_map WHERE ptype_subfield = psf.id AND value = SUBSTRING( _007, psf.start_pos + 1, psf.length );
+
+ IF pval.id IS NOT NULL THEN
+ rowid := rowid + 1;
+ retval.id := rowid;
+ retval.ptype := ptype.ptype_key;
+ retval.subfield := psf.id;
+ retval.value := pval.id;
+ RETURN NEXT retval;
+ END IF;
+
+ END LOOP;
+ END IF;
+ END IF;
+
+ RETURN;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE TYPE vandelay.flat_marc AS ( tag CHAR(3), ind1 TEXT, ind2 TEXT, subfield TEXT, value TEXT );
+CREATE OR REPLACE FUNCTION vandelay.flay_marc ( TEXT ) RETURNS SETOF vandelay.flat_marc AS $func$
+
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+
+my $xml = shift;
+my $r = MARC::Record->new_from_xml( $xml );
+
+return_next( { tag => 'LDR', value => $r->leader } );
+
+for my $f ( $r->fields ) {
+ if ($f->is_control_field) {
+ return_next({ tag => $f->tag, value => $f->data });
+ } else {
+ for my $s ($f->subfields) {
+ return_next({
+ tag => $f->tag,
+ ind1 => $f->indicator(1),
+ ind2 => $f->indicator(2),
+ subfield => $s->[0],
+ value => $s->[1]
+ });
+
+ if ( $f->tag eq '245' and $s->[0] eq 'a' ) {
+ my $trim = $f->indicator(2) || 0;
+ return_next({
+ tag => 'tnf',
+ ind1 => $f->indicator(1),
+ ind2 => $f->indicator(2),
+ subfield => 'a',
+ value => substr( $s->[1], $trim )
+ });
+ }
+ }
+ }
+}
+
+return undef;
+
+$func$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION vandelay.flatten_marc ( marc TEXT ) RETURNS SETOF vandelay.flat_marc AS $func$
+DECLARE
+ output vandelay.flat_marc%ROWTYPE;
+ field RECORD;
+BEGIN
+ FOR field IN SELECT * FROM vandelay.flay_marc( marc ) LOOP
+ output.ind1 := field.ind1;
+ output.ind2 := field.ind2;
+ output.tag := field.tag;
+ output.subfield := field.subfield;
+ IF field.subfield IS NOT NULL AND field.tag NOT IN ('020','022','024') THEN -- exclude standard numbers and control fields
+ output.value := naco_normalize(field.value, field.subfield);
+ ELSE
+ output.value := field.value;
+ END IF;
+
+ CONTINUE WHEN output.value IS NULL;
+
+ RETURN NEXT output;
+ END LOOP;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.extract_rec_attrs ( xml TEXT, attr_defs TEXT[]) RETURNS hstore AS $_$
+DECLARE
+ transformed_xml TEXT;
+ prev_xfrm TEXT;
+ normalizer RECORD;
+ xfrm config.xml_transform%ROWTYPE;
+ attr_value TEXT;
+ new_attrs HSTORE := ''::HSTORE;
+ attr_def config.record_attr_definition%ROWTYPE;
+BEGIN
+
+ FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE name IN (SELECT * FROM UNNEST(attr_defs)) ORDER BY format LOOP
+
+ IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
+ SELECT ARRAY_TO_STRING(ARRAY_ACCUM(x.value), COALESCE(attr_def.joiner,' ')) INTO attr_value
+ FROM vandelay.flatten_marc(xml) AS x
+ WHERE x.tag LIKE attr_def.tag
+ AND CASE
+ WHEN attr_def.sf_list IS NOT NULL
+ THEN POSITION(x.subfield IN attr_def.sf_list) > 0
+ ELSE TRUE
+ END
+ GROUP BY x.tag
+ ORDER BY x.tag
+ LIMIT 1;
+
+ ELSIF attr_def.fixed_field IS NOT NULL THEN -- a named fixed field, see config.marc21_ff_pos_map.fixed_field
+ attr_value := vandelay.marc21_extract_fixed_field(xml, attr_def.fixed_field);
+
+ ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
+
+ SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
+
+ -- See if we can skip the XSLT ... it's expensive
+ IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
+ -- Can't skip the transform
+ IF xfrm.xslt <> '---' THEN
+ transformed_xml := oils_xslt_process(xml,xfrm.xslt);
+ ELSE
+ transformed_xml := xml;
+ END IF;
+
+ prev_xfrm := xfrm.name;
+ END IF;
+
+ IF xfrm.name IS NULL THEN
+ -- just grab the marcxml (empty) transform
+ SELECT INTO xfrm * FROM config.xml_transform WHERE xslt = '---' LIMIT 1;
+ prev_xfrm := xfrm.name;
+ END IF;
+
+ attr_value := oils_xpath_string(attr_def.xpath, transformed_xml, COALESCE(attr_def.joiner,' '), ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]);
+
+ ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
+ SELECT value::TEXT INTO attr_value
+ FROM vandelay.marc21_physical_characteristics(xml)
+ WHERE subfield = attr_def.phys_char_sf
+ LIMIT 1; -- Just in case ...
+
+ END IF;
+
+ -- apply index normalizers to attr_value
+ FOR normalizer IN
+ SELECT n.func AS func,
+ n.param_count AS param_count,
+ m.params AS params
+ FROM config.index_normalizer n
+ JOIN config.record_attr_index_norm_map m ON (m.norm = n.id)
+ WHERE attr = attr_def.name
+ ORDER BY m.pos LOOP
+ EXECUTE 'SELECT ' || normalizer.func || '(' ||
+ quote_literal( attr_value ) ||
+ CASE
+ WHEN normalizer.param_count > 0
+ THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
+ ELSE ''
+ END ||
+ ')' INTO attr_value;
+
+ END LOOP;
+
+ -- Add the new value to the hstore
+ new_attrs := new_attrs || hstore( attr_def.name, attr_value );
+
+ END LOOP;
+
+ RETURN new_attrs;
+END;
+$_$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.extract_rec_attrs ( xml TEXT ) RETURNS hstore AS $_$
+ SELECT vandelay.extract_rec_attrs( $1, (SELECT ARRAY_ACCUM(name) FROM config.record_attr_definition));
+$_$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION vandelay.match_bib_record ( ) RETURNS TRIGGER AS $func$
+DECLARE
+ incoming_existing_id TEXT;
+ my_bib_queue vandelay.bib_queue%ROWTYPE;
+ my_match_set vandelay.match_set%ROWTYPE;
+ test vandelay.match_set_point%ROWTYPE;
+ potential_matches BIGINT[];
+ matches BIGINT[];
+ rvalue TEXT;
+ quality_set hstore;
+ tmp_rec BIGINT;
+ tmp_quality INT;
+ first_round BOOL;
+BEGIN
+ DELETE FROM vandelay.bib_match WHERE queued_record = NEW.id;
+
+ incoming_existing_id := oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]',NEW.marc);
+
+ IF incoming_existing_id IS NOT NULL THEN
+ SELECT id INTO tmp_rec FROM biblio.record_entry WHERE id = exact_id;
+ IF tmp_rec IS NOT NULL THEN
+ INSERT INTO vandelay.bib_match (queued_record, eg_record, quality) VALUES ( NEW.id, exact_id, 9999);
+ RETURN NEW;
+ END IF;
+ END IF;
+
+ SELECT * INTO my_bib_queue FROM vandelay.bib_queue WHERE id = NEW.queue;
+
+ first_round := TRUE;
+ -- whew ... here we go ...
+
+
+ -- Commented out until replaced by tree-ish version
+/*
+ FOR test IN SELECT * FROM vandelay.match_set_point WHERE match_set = my_bib_queue.match_set ORDER BY required DESC LOOP
+ IF test.tag IS NOT NULL THEN
+ FOR rvalue IN SELECT value FROM vandelay.flatten_marc( xml ) WHERE tag = test.tag AND subfield = test.subfield LOOP
+ SELECT ARRAY_ACCUM(DISTINCT record) INTO potential_matches FROM metabib.real_full_rec WHERE tag = test.tag AND subfield = test.subfield AND value = rvalue;
+
+ IF first_round THEN
+ matches := potential_matches;
+ first_round := FALSE;
+ ELSIF test.required THEN
+ FOR tmp_rec IN SELECT * FROM UNNEST(matches) LOOP
+ IF tmp_rec NOT IN (SELECT * FROM UNNEST(potential_matches)) THEN
+ matches := evergreen.array_remove_item_by_value(matches, tmp_rec);
+ potential_matches := evergreen.array_remove_item_by_value(potential_matches, tmp_rec);
+ END IF;
+ END LOOP;
+ END IF;
+
+ -- add the quality for this match
+ FOR tmp_rec IN SELECT * FROM UNNEST(potential_matches) LOOP
+ tmp_quality := COALESCE((quality_set -> tmp_rec::TEXT)::INT, 0);
+ quality_set := quality_set || hstore(tmp_rec::TEXT, (tmp_quality + test.quality)::TEXT);
+ END LOOP;
+
+ END LOOP;
+ ELSE
+ rvalue := vandelay.vandelay.extract_rec_attrs(xml, ARRAY[test.svf]);
+
+ IF first_round THEN
+ matches := potential_matches;
+ first_round := FALSE;
+ ELSIF test.required THEN
+ FOR tmp_rec IN SELECT * FROM UNNEST(matches) LOOP
+ IF tmp_rec NOT IN (SELECT * FROM UNNEST(potential_matches)) THEN
+ matches := evergreen.array_remove_item_by_value(matches, tmp_rec);
+ potential_matches := evergreen.array_remove_item_by_value(potential_matches, tmp_rec);
+ END IF;
+ END LOOP;
+ END IF;
+
+ -- add the quality for this match
+ FOR tmp_rec IN SELECT * FROM UNNEST(potential_matches) LOOP
+ tmp_quality := COALESCE((quality_set -> tmp_rec::TEXT)::INT, 0);
+ quality_set := quality_set || hstore(tmp_rec::TEXT, (tmp_quality + test.quality)::TEXT);
+ END LOOP;
+
+ END IF;
+ END LOOP;
+
+ FOR tmp_rec IN SELECT * FROM UNNEST(matches) LOOP
+ INSERT INTO vandelay.bib_match (matched_set, queued_record, eg_record, quality) VALUES (my_bib_queue.match_set, NEW.id, tmp_rec, (quality_set -> tmp_rec::TEXT));
+ END LOOP;
+*/
+
+ RETURN NEW;
+END;
+$func$ LANGUAGE PLPGSQL;
+
+CREATE OR REPLACE FUNCTION vandelay.incoming_record_quality ( xml TEXT, match_set_id INT ) RETURNS INT AS $_$
+DECLARE
+ out_q INT := 0;
+ rvalue TEXT;
+ test vandelay.match_set_quality%ROWTYPE;
+BEGIN
+
+ FOR test IN SELECT * FROM vandelay.match_set_quality WHERE match_set = match_set_id LOOP
+ IF test.tag IS NOT NULL THEN
+ FOR rvalue IN SELECT value FROM vandelay.flatten_marc( xml ) WHERE tag = test.tag AND subfield = test.subfield LOOP
+ IF test.value = rvalue THEN
+ out_q := out_q + test.quality;
+ END IF;
+ END LOOP;
+ ELSE
+ IF test.value = vandelay.extract_rec_attrs(xml, ARRAY[test.svf]) THEN
+ out_q := out_q + test.quality;
+ END IF;
+ END IF;
+ END LOOP;
+
+ RETURN out_q;
+END;
+$_$ LANGUAGE PLPGSQL;
CREATE TYPE vandelay.tcn_data AS (tcn TEXT, tcn_source TEXT, used BOOL);
CREATE OR REPLACE FUNCTION vandelay.find_bib_tcn_data ( xml TEXT ) RETURNS SETOF vandelay.tcn_data AS $_$