CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
CREATE TABLE metabib.record_attr_vector_list (
- source BIGINT PRIMARY KEY REFERNECES biblio.record_entry (id),
+ source BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id),
vlist INT[] NOT NULL -- stores id from ccvm AND murav
);
CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
);
-- Back-compat view ... we're moving to an INTARRAY world
-CREATE VIEW metabib.record_attr AS
- SELECT v.source AS id,
- hstore( ARRAY_AGG( ARRAY[ COALESCE(c.ctype,u.attr), COALESCE(c.value,u.value) ] ) )
+CREATE VIEW metabib.record_attr_flat AS
+ SELECT DISTINCT v.source AS id,
+ c.ctype AS attr,
+ c.code AS value
FROM metabib.record_attr_vector_list v
- LEFT JOIN metabib.uncontrolled_record_attr_value u ON ( u.id = ANY( v.vlist ) )
LEFT JOIN config.coded_value_map c ON ( c.id = ANY( v.vlist ) )
- WHERE c.id IS NOT NULL OR u.id IS NOT NULL
- GROUP BY 1;
+ WHERE c.id IS NOT NULL
+ UNION ALL
+ SELECT DISTINCT v.source AS id,
+ u.attr,
+ u.value
+ FROM metabib.record_attr_vector_list v
+ LEFT JOIN metabib.uncontrolled_record_attr_value u ON ( u.id = ANY( v.vlist ) )
+ WHERE u.id IS NOT NULL;
+CREATE VIEW metabib.record_attr AS
+ SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs FROM metabib.record_attr_flat GROUP BY 1;
-- Back-back-compat view ... we use to live in an HSTORE world
CREATE TYPE metabib.rec_desc_type AS (
norm_attr_value TEXT[];
tmp_xml XML;
attr_def config.record_attr_definition%ROWTYPE;
- ccvm_row config.code_value_map%ROWTYPE;
+ ccvm_row config.coded_value_map%ROWTYPE;
BEGIN
IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE name = ANY( attr_list ) ORDER BY format LOOP
- attr_value := '{}'::TEXT[]
- norm_attr_value := '{}'::TEXT[]
- attr_vector_tmp := '{}'::INT[]
+ attr_value := '{}'::TEXT[];
+ norm_attr_value := '{}'::TEXT[];
+ attr_vector_tmp := '{}'::INT[];
- SELECT * INTO ccvm_row FROM config.code_value_map c WHERE c.ctype = attr_def.name;
+ SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
-- tag+sf attrs only support SVF
IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
WHEN attr_def.sf_list IS NOT NULL
THEN POSITION(subfield IN attr_def.sf_list) > 0
ELSE TRUE
- END
+ END
GROUP BY tag
ORDER BY tag
LIMIT 1;
ELSIF attr_def.xpath IS NOT NULL THEN -- and xpath expression
SELECT INTO xfrm * FROM config.xml_transform WHERE name = attr_def.format;
-
+
-- See if we can skip the XSLT ... it's expensive
IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
-- Can't skip the transform
END IF;
FOR tmp_xml IN SELECT XPATH(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
- attr_value := attr_value ||
- oils_xpath_string(
- '//*',
- tmp_xml::TEXT,
- COALESCE(attr_def.joiner,' '),
- ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
- );
- EXIT WHEN NOT attr_def.multi;
+ tmp_val := oils_xpath_string(
+ '//*',
+ tmp_xml::TEXT,
+ COALESCE(attr_def.joiner,' '),
+ ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
+ );
+ IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
+ attr_value := attr_value || tmp_val;
+ EXIT WHEN NOT attr_def.multi;
+ END IF;
END LOOP;
ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
- SELECT ARRAY_AGG(m.value) INTO attr_vlue
+ SELECT ARRAY_AGG(m.value) INTO attr_value
FROM vandelay.marc21_physical_characteristics(rmarc) v
LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
- WHERE v.subfield = attr_def.phys_char_sf
- AND ( ccvm.id IS NULL OR ( ccvm.id IS NOT NULL AND v.id IS NOT NULL) );
+ WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
+ AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
IF NOT attr_def.multi THEN
attr_value := ARRAY[attr_value[1]];
END IF;
- -- apply index normalizers to attr_value
- FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value);
+ -- apply index normalizers to attr_value
+ FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
FOR normalizer IN
SELECT n.func AS func,
n.param_count AS param_count,
WHERE attr = attr_def.name
ORDER BY m.pos LOOP
EXECUTE 'SELECT ' || normalizer.func || '(' ||
- COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
+ COALESCE( quote_literal( tmp_val ), 'NULL' ) ||
CASE
WHEN normalizer.param_count > 0
THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
ELSE ''
END ||
- ')' INTO tmp_val;
+ ')' INTO tmp_val;
END LOOP;
- norm_attr_value := norm_attr_value || tmp_val;
+ IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
+ norm_attr_value := norm_attr_value || tmp_val;
+ END IF;
END LOOP;
-
+
IF attr_def.filter THEN
-- Create unknown uncontrolled values and find the IDs of the values
- IF ccvm.id IS NULL THEN
- FOR tmp_val FROM SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
- BEGIN; -- use subtransaction to isolate unique constraint violations
- INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
- EXCEPTION WHEN unique_violation THEN END;
+ IF ccvm_row.id IS NULL THEN
+ FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
+ IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
+ BEGIN -- use subtransaction to isolate unique constraint violations
+ INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
+ EXCEPTION WHEN unique_violation THEN END;
+ END IF;
END LOOP;
-
+
SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
ELSE
- SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND value = ANY( norm_attr_value );
+ SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
END IF;
-- Add the new value to the vector
END LOOP;
- IF ARRAY_LENGTH(vlist, 1) > 0 THEN
+ IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
/* We may need to rewrite the vlist to contain
the intersection of new values for requested
attrs and old values for ignored attrs. To
END;
-$$ LANGUAGE PLPGSQL;
+$func$ LANGUAGE PLPGSQL;
-- AFTER UPDATE OR INSERT trigger for biblio.record_entry
CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
BEGIN
- IF NEW.deleted IS TRUE THEN -- If this bib is deleted
+ IF NEW.deleted THEN -- If this bib is deleted
PERFORM * FROM config.internal_flag WHERE
name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
IF NOT FOUND THEN
$p$ LANGUAGE PLPGSQL;
COMMIT;
+
CREATE EXTENSION intarray;
-ALTER TABLE config.record_attr_definition ADD COLUMN multi NOT NULL DEFAULT TRUE;
+ALTER TABLE config.record_attr_definition ADD COLUMN multi BOOL NOT NULL DEFAULT TRUE;
UPDATE config.record_attr_definition
SET multi = FALSE
CREATE UNIQUE INDEX muv_once_idx ON metabib.uncontrolled_record_attr_value (attr,value);
CREATE TABLE metabib.record_attr_vector_list (
- source BIGINT PRIMARY KEY REFERNECES biblio.record_entry (id),
+ source BIGINT PRIMARY KEY REFERENCES biblio.record_entry (id),
vlist INT[] NOT NULL -- stores id from ccvm AND murav
);
CREATE INDEX mrca_vlist_idx ON metabib.record_attr_vector_list USING gin ( vlist gin__int_ops );
norm_attr_value TEXT[];
tmp_xml XML;
attr_def config.record_attr_definition%ROWTYPE;
- ccvm_row config.code_value_map%ROWTYPE;
+ ccvm_row config.coded_value_map%ROWTYPE;
BEGIN
IF attr_list IS NULL OR rdeleted THEN -- need to do the full dance on INSERT or undelete
FOR attr_def IN SELECT * FROM config.record_attr_definition WHERE name = ANY( attr_list ) ORDER BY format LOOP
- attr_value := '{}'::TEXT[]
- norm_attr_value := '{}'::TEXT[]
- attr_vector_tmp := '{}'::INT[]
+ attr_value := '{}'::TEXT[];
+ norm_attr_value := '{}'::TEXT[];
+ attr_vector_tmp := '{}'::INT[];
- SELECT * INTO ccvm_row FROM config.code_value_map c WHERE c.ctype = attr_def.name;
+ SELECT * INTO ccvm_row FROM config.coded_value_map c WHERE c.ctype = attr_def.name LIMIT 1;
-- tag+sf attrs only support SVF
IF attr_def.tag IS NOT NULL THEN -- tag (and optional subfield list) selection
END IF;
FOR tmp_xml IN SELECT XPATH(attr_def.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]) LOOP
- attr_value := attr_value ||
- oils_xpath_string(
- '//*',
- tmp_xml::TEXT,
- COALESCE(attr_def.joiner,' '),
- ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
- );
- EXIT WHEN NOT attr_def.multi;
+ tmp_val := oils_xpath_string(
+ '//*',
+ tmp_xml::TEXT,
+ COALESCE(attr_def.joiner,' '),
+ ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
+ );
+ IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
+ attr_value := attr_value || tmp_val;
+ EXIT WHEN NOT attr_def.multi;
+ END IF;
END LOOP;
ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
- SELECT ARRAY_AGG(m.value) INTO attr_vlue
+ SELECT ARRAY_AGG(m.value) INTO attr_value
FROM vandelay.marc21_physical_characteristics(rmarc) v
LEFT JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
- WHERE v.subfield = attr_def.phys_char_sf
- AND ( ccvm.id IS NULL OR ( ccvm.id IS NOT NULL AND v.id IS NOT NULL) );
+ WHERE v.subfield = attr_def.phys_char_sf AND (m.value IS NOT NULL AND BTRIM(m.value) <> '')
+ AND ( ccvm_row.id IS NULL OR ( ccvm_row.id IS NOT NULL AND v.id IS NOT NULL) );
IF NOT attr_def.multi THEN
attr_value := ARRAY[attr_value[1]];
END IF;
-- apply index normalizers to attr_value
- FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value);
+ FOR tmp_val IN SELECT value FROM UNNEST(attr_value) x(value) LOOP
FOR normalizer IN
SELECT n.func AS func,
n.param_count AS param_count,
')' INTO tmp_val;
END LOOP;
- norm_attr_value := norm_attr_value || tmp_val;
+ IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
+ norm_attr_value := norm_attr_value || tmp_val;
+ END IF;
END LOOP;
IF attr_def.filter THEN
-- Create unknown uncontrolled values and find the IDs of the values
- IF ccvm.id IS NULL THEN
- FOR tmp_val FROM SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
- BEGIN; -- use subtransaction to isolate unique constraint violations
- INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
- EXCEPTION WHEN unique_violation THEN END;
+ IF ccvm_row.id IS NULL THEN
+ FOR tmp_val IN SELECT value FROM UNNEST(norm_attr_value) x(value) LOOP
+ IF tmp_val IS NOT NULL AND BTRIM(tmp_val) <> '' THEN
+ BEGIN -- use subtransaction to isolate unique constraint violations
+ INSERT INTO metabib.uncontrolled_record_attr_value ( attr, value ) VALUES ( attr_def.name, tmp_val );
+ EXCEPTION WHEN unique_violation THEN END;
+ END IF;
END LOOP;
SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM metabib.uncontrolled_record_attr_value WHERE attr = attr_def.name AND value = ANY( norm_attr_value );
ELSE
- SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND value = ANY( norm_attr_value );
+ SELECT ARRAY_AGG(id) INTO attr_vector_tmp FROM config.coded_value_map WHERE ctype = attr_def.name AND code = ANY( norm_attr_value );
END IF;
-- Add the new value to the vector
END LOOP;
- IF ARRAY_LENGTH(vlist, 1) > 0 THEN
+ IF ARRAY_LENGTH(attr_vector, 1) > 0 THEN
/* We may need to rewrite the vlist to contain
the intersection of new values for requested
attrs and old values for ignored attrs. To
IF rdeleted THEN -- initial insert OR revivication
DELETE FROM metabib.record_attr_vector_list WHERE source = rid;
INSERT INTO metabib.record_attr_vector_list (source, vlist) VALUES (rid, attr_vector);
- ELSE
+ ELSE
UPDATE metabib.record_attr_vector_list SET vlist = attr_vector WHERE source = rid;
END IF;
END IF;
END;
-$$ LANGUAGE PLPGSQL;
+$func$ LANGUAGE PLPGSQL;
-- AFTER UPDATE OR INSERT trigger for biblio.record_entry
CREATE OR REPLACE FUNCTION biblio.indexing_ingest_or_delete () RETURNS TRIGGER AS $func$
BEGIN
- IF NEW.deleted IS TRUE THEN -- If this bib is deleted
+ IF NEW.deleted THEN -- If this bib is deleted
PERFORM * FROM config.internal_flag WHERE
name = 'ingest.metarecord_mapping.preserve_on_delete' AND enabled;
IF NOT FOUND THEN
END;
$func$ LANGUAGE PLPGSQL;
-CREATE TEMP UNLOGGED TABLE attr_set ON COMMIT DROP AS SELECT DISTINCT source, (each(attrs)).key,(each(attrs)).value FROM metabib.record_attr;
+CREATE TEMP TABLE attr_set ON COMMIT DROP AS SELECT DISTINCT id AS source, (each(attrs)).key,(each(attrs)).value FROM metabib.record_attr;
+UPDATE attr_set SET source = NULL WHERE BTRIM(value) = '';
-- Grab sort values for the new sorting mechanism
INSERT INTO metabib.record_sorter (source,attr,value)
SELECT a.source, a.key, a.value
FROM attr_set a
- JOIN config.record_attr_defintion d ON (d.name = a.key AND d.sorter);
+ JOIN config.record_attr_definition d ON (d.name = a.key AND d.sorter AND a.value IS NOT NULL);
-- Rewrite uncontrolled SVF record attrs as the seeds of an intarray vector
INSERT INTO metabib.uncontrolled_record_attr_value (attr,value)
SELECT DISTINCT a.key, a.value
FROM attr_set a
- JOIN config.record_attr_defintion d ON (d.name = a.key AND d.filter);
+ JOIN config.record_attr_definition d ON (d.name = a.key AND d.filter AND a.value IS NOT NULL)
LEFT JOIN config.coded_value_map m ON (m.ctype = a.key)
WHERE m.id IS NULL;
-- Now construct the record-specific vector from the SVF data
INSERT INTO metabib.record_attr_vector_list (source,vlist)
- SELECT a.source, ARRAY_AGG(COALESCE(u.id, c.id))
+ SELECT a.id, ARRAY_AGG(COALESCE(u.id, c.id))
FROM metabib.record_attr a
- JOIN attr_set USING (source)
- LEFT JOIN metabib.uncontrolled_record_attr_value u ON (u.attr = attr_set.key)
- LEFT JOIN config.coded_value_map c ON (c.ctype = attr_set.key)
+ JOIN attr_set ON (a.id = attr_set.source)
+ LEFT JOIN metabib.uncontrolled_record_attr_value u ON (u.attr = attr_set.key AND u.value = attr_set.value)
+ LEFT JOIN config.coded_value_map c ON (c.ctype = attr_set.key AND c.code = attr_set.value)
WHERE COALESCE(u.id,c.id) IS NOT NULL
GROUP BY 1;
+DROP VIEW metabib.rec_descriptor;
DROP TABLE metabib.record_attr;
-CREATE TYPE metbib.record_attr_type AS (
+CREATE TYPE metabib.record_attr_type AS (
id BIGINT,
attrs HSTORE
);
-- Back-compat view ... we're moving to an INTARRAY world
-CREATE VIEW metabib.record_attr AS
- SELECT v.source AS id,
- hstore( ARRAY_AGG( ARRAY[ COALESCE(c.ctype,u.attr), COALESCE(c.value,u.value) ] ) )
+CREATE VIEW metabib.record_attr_flat AS
+ SELECT DISTINCT v.source AS id,
+ c.ctype AS attr,
+ c.code AS value
FROM metabib.record_attr_vector_list v
- LEFT JOIN metabib.uncontrolled_record_attr_value u ON ( u.id = ANY( v.vlist ) )
LEFT JOIN config.coded_value_map c ON ( c.id = ANY( v.vlist ) )
- WHERE c.id IS NOT NULL OR u.id IS NOT NULL
- GROUP BY 1;
+ WHERE c.id IS NOT NULL
+ UNION ALL
+ SELECT DISTINCT v.source AS id,
+ u.attr,
+ u.value
+ FROM metabib.record_attr_vector_list v
+ LEFT JOIN metabib.uncontrolled_record_attr_value u ON ( u.id = ANY( v.vlist ) )
+ WHERE u.id IS NOT NULL;
+
+CREATE VIEW metabib.record_attr AS
+ SELECT id, HSTORE( ARRAY_AGG( attr ), ARRAY_AGG( value ) ) AS attrs FROM metabib.record_attr_flat GROUP BY 1;
+
+CREATE VIEW metabib.rec_descriptor AS
+ SELECT id,
+ id AS record,
+ (populate_record(NULL::metabib.rec_desc_type, attrs)).*
+ FROM metabib.record_attr;
COMMIT;