From: Dan Wells Date: Thu, 10 Oct 2013 21:15:12 +0000 (-0400) Subject: Stamping 0844: better MODS for browse, etc. X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=1ef503bc1a34e60bbe98663292c36b9348aee15e;p=evergreen%2Fpines.git Stamping 0844: better MODS for browse, etc. Signed-off-by: Dan Wells --- diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index c80854e933..b8cbf460e9 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps BEFORE INSERT OR UPDATE ON config.db_patch_dependencies FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates'); -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0843', :eg_version); -- dbwells/rsteed +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0844', :eg_version); -- dbwells/senator CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/0844.data.better_mods_for_browse_etc.sql b/Open-ILS/src/sql/Pg/upgrade/0844.data.better_mods_for_browse_etc.sql new file mode 100644 index 0000000000..d75969c5ee --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0844.data.better_mods_for_browse_etc.sql @@ -0,0 +1,3623 @@ +BEGIN; + +-- check whether patch can be applied +SELECT evergreen.upgrade_deps_block_check('0844', :eg_version); + +-- 953.data.MODS32-xsl.sql +UPDATE config.xml_transform SET xslt=$$ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BK + SE + + + BK + MM + CF + MP + VM + MU + + + + + + + + + b + afgk + + + + + abfgk + + + + + + + + + + + + + + + + + + <xsl:value-of select="substring($titleChop,@ind2+1)"/> + + + + + <xsl:value-of select="$titleChop"/> + + + + + + + + + b + b + afgk + + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="subfieldSelect"> + <xsl:with-param name="codes">abfgk</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + + abfgk + + + + + + + + + + + <xsl:value-of select="substring($titleBrowseChop,@ind2+1)"/> + + + + + <xsl:value-of select="$titleBrowseChop"/> + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="subfieldSelect"> + <xsl:with-param name="codes">a</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + + + a + + + + + + + + + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + + + + + + + + <xsl:value-of select="substring($titleChop,@ind2+1)"/> + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="subfieldSelect"> + <!-- 1/04 removed $h, $b --> + <xsl:with-param name="codes">af</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <xsl:value-of select="$titleChop"/> + + + + + + + + + + + <xsl:value-of select="substring($titleChop,$nfi+1)"/> + + + + + <xsl:value-of select="$titleChop"/> + + + + + + + + + + + + ah + + + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + <xsl:value-of select="substring($titleChop,@ind1+1)"/> + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + + + creator + + + + + + + + + + creator + + + + + + + + + + creator + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + personal + + + + + + + + + + + yes + + + yes + + + text + cartographic + notated music + sound recording-nonmusical + sound recording-musical + still image + moving image + three dimensional object + software, multimedia + mixed material + + + + globe + + + remote sensing image + + + + + + map + + + atlas + + + + + + + + database + + + loose-leaf + + + series + + + newspaper + + + periodical + + + web site + + + + + + + + abstract or summary + + + bibliography + + + catalog + + + dictionary + + + encyclopedia + + + handbook + + + legal article + + + index + + + discography + + + legislation + + + theses + + + survey of literature + + + review + + + programmed text + + + filmography + + + directory + + + statistics + + + technical report + + + legal case and case notes + + + law report or digest + + + treaty + + + + + + conference publication + + + + + + + + numeric data + + + database + + + font + + + game + + + + + + patent + + + festschrift + + + + biography + + + + + essay + + + drama + + + comic strip + + + fiction + + + humor, satire + + + letter + + + novel + + + short story + + + speech + + + + + + + biography + + + conference publication + + + drama + + + essay + + + fiction + + + folktale + + + history + + + humor, satire + + + memoir + + + poetry + + + rehearsal + + + reporting + + + sound + + + speech + + + + + + + art original + + + kit + + + art reproduction + + + diorama + + + filmstrip + + + legal article + + + picture + + + graphic + + + technical drawing + + + motion picture + + + chart + + + flash card + + + microscope slide + + + model + + + realia + + + slide + + + transparency + + + videorecording + + + toy + + + + + + + + + + abvxyz + - + + + + + + + + + code + marccountry + + + + + + + + code + iso3166 + + + + + + + + text + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + :,;/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + monographic + continuing + + + + + + + ab + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + reformatted digital + + + digitized microfilm + + + digitized other analog + + + + + + + + + + + + + + + +
braille
+
+ +
print
+
+ +
electronic
+
+ +
microfiche
+
+ +
microfilm
+
+
+ + +
+ + + + + +
+
+ +
+ + + + + +
+
+ +
+ + + + + +
+
+ +
+ + + + + +
+
+ +
+ + + + + +
+
+ +
+ + + + + +
+
+ +
+ +
+
+ + + + access + + + preservation + + + replacement + + + + + +
chip cartridge
+
+ +
computer optical disc cartridge
+
+ +
magnetic disc
+
+ +
magneto-optical disc
+
+ +
optical disc
+
+ +
remote
+
+ +
tape cartridge
+
+ +
tape cassette
+
+ +
tape reel
+
+ + +
celestial globe
+
+ +
earth moon globe
+
+ +
planetary or lunar globe
+
+ +
terrestrial globe
+
+ + +
kit
+
+ + +
atlas
+
+ +
diagram
+
+ +
map
+
+ +
model
+
+ +
profile
+
+ +
remote-sensing image
+
+ +
section
+
+ +
view
+
+ + +
aperture card
+
+ +
microfiche
+
+ +
microfiche cassette
+
+ +
microfilm cartridge
+
+ +
microfilm cassette
+
+ +
microfilm reel
+
+ +
microopaque
+
+ + +
film cartridge
+
+ +
film cassette
+
+ +
film reel
+
+ + +
chart
+
+ +
collage
+
+ +
drawing
+
+ +
flash card
+
+ +
painting
+
+ +
photomechanical print
+
+ +
photonegative
+
+ +
photoprint
+
+ +
picture
+
+ +
print
+
+ +
technical drawing
+
+ + +
notated music
+
+ + +
filmslip
+
+ +
filmstrip cartridge
+
+ +
filmstrip roll
+
+ +
other filmstrip type
+
+ +
slide
+
+ +
transparency
+
+ +
remote-sensing image
+
+ +
cylinder
+
+ +
roll
+
+ +
sound cartridge
+
+ +
sound cassette
+
+ +
sound disc
+
+ +
sound-tape reel
+
+ +
sound-track film
+
+ +
wire recording
+
+ + +
braille
+
+ +
combination
+
+ +
moon
+
+ +
tactile, with no writing system
+
+ + +
braille
+
+ +
large print
+
+ +
regular print
+
+ +
text in looseleaf binder
+
+ + +
videocartridge
+
+ +
videocassette
+
+ +
videodisc
+
+ +
videoreel
+
+ + + + + + + + + + abce + + + +
+ + + + + + + + + + ab + + + + + + + + agrt + + + + + + + ab + + + + + + + + + adolescent + + + adult + + + general + + + juvenile + + + preschool + + + specialized + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + defg + + + + + + + + + + + + marcgac + + + + + + iso3166 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ab + + + + + + + abx + + + + + + + ab + + + + + + + + + + + + + + + + + + + + + + + + + + + + ab + + + + + + + + + + av + + + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + <xsl:value-of select="substring($titleChop,@ind2+1)"/> + + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="subfieldSelect"> + <xsl:with-param name="codes">av</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + + abcx3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="specialSubfieldSelect"> + <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> + <xsl:with-param name="axis">t</xsl:with-param> + <xsl:with-param name="afterCodes">g</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + aq + t + g + + + + + + + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="specialSubfieldSelect"> + <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> + <xsl:with-param name="axis">t</xsl:with-param> + <xsl:with-param name="afterCodes">dg</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + + + + + + + + + c + t + dgn + + + + + + + + + + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="specialSubfieldSelect"> + <xsl:with-param name="anyCodes">tfklsv</xsl:with-param> + <xsl:with-param name="axis">t</xsl:with-param> + <xsl:with-param name="afterCodes">g</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + aqdc + t + gn + + + + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="subfieldSelect"> + <xsl:with-param name="codes">adfgklmorsv</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + + + + + + + + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + <xsl:value-of select="substring($titleChop,@ind1+1)"/> + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="specialSubfieldSelect"> + <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> + <xsl:with-param name="axis">t</xsl:with-param> + <xsl:with-param name="afterCodes">g</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + aq + t + g + + + + + + + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="specialSubfieldSelect"> + <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> + <xsl:with-param name="axis">t</xsl:with-param> + <xsl:with-param name="afterCodes">dg</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + + + + + + + + + c + t + dgn + + + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:call-template name="specialSubfieldSelect"> + <xsl:with-param name="anyCodes">tfklsv</xsl:with-param> + <xsl:with-param name="axis">t</xsl:with-param> + <xsl:with-param name="afterCodes">g</xsl:with-param> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + + + + + + + aqdc + t + gn + + + + + + + + + + + + + + adfgklmorsv + + + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + <xsl:value-of select="substring($titleChop,@ind2+1)"/> + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + + + + + + + + isbn + + + + + + + + + + isrc + + + + + + + + + + ismn + + + + + + + + + + sici + + + + ab + + + + + + issn + + + + + + + + lccn + + + + + + + + + + issue number + matrix number + music plate + music publisher + videorecording identifier + + + + + + + ba + ab + + + + + + + + + + ab + + + + + + + + doi + hdl + uri + + + + + + + + + + + + + + + + + y3z + + + + + + + + + + + + + + + + + + + + + y3 + + + + + + + z + + + + + + + + + + + + + + + + + + abje + + + + + + + + abcd35 + + + + + + + abcde35 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + n + n + fgkdlmor + + + + + p + p + fgkdlmor + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + g + g + pst + + + + + p + p + fgkdlmor + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cdn + + + + + + + + + + aq + + + + :,;/ + + + + + + + + + + acdeq + + + + + + constituent + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:value-of select="."></xsl:value-of> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:value-of select="."></xsl:value-of> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:value-of select="."></xsl:value-of> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + + <xsl:call-template name="chopPunctuation"> + <xsl:with-param name="chopString"> + <xsl:value-of select="."></xsl:value-of> + </xsl:with-param> + </xsl:call-template> + + + + + + + + + + + + + + + code + marcgac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lcsh + lcshac + mesh + + nal + csh + rvm + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + aq + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cdnp + + + + + + + + + + + + + + + + abcdeqnp + + + + + + + + + + + + + + + + + + + + + adfhklor + + + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + <xsl:value-of select="substring($titleChop,@ind1+1)"/> + + + + + + <xsl:value-of select="$titleChop" /> + + + + + + + + + + + + + + + + + abcd + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + bc + + + + + + + + + + + + + + + + + + + + + + + + + + + yes + + + + + + + + + + + + + + + + + + + + + + + + + + + Arabic + Latin + Chinese, Japanese, Korean + Cyrillic + Hebrew + Greek + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + summary or subtitle + sung or spoken text + libretto + table of contents + accompanying material + translation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + summary or subtitle + sung or spoken text + libretto + table of contents + accompanying material + translation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .:,;/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
$$ WHERE name = 'mods32'; + +CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + idx config.metabib_field%ROWTYPE; + xfrm config.xml_transform%ROWTYPE; + prev_xfrm TEXT; + transformed_xml TEXT; + xml_node TEXT; + xml_node_list TEXT[]; + facet_text TEXT; + browse_text TEXT; + sort_value TEXT; + raw_text TEXT; + curr_text TEXT; + joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? + authority_text TEXT; + authority_link BIGINT; + output_row metabib.field_entry_template%ROWTYPE; +BEGIN + + -- Start out with no field-use bools set + output_row.browse_field = FALSE; + output_row.facet_field = FALSE; + output_row.search_field = FALSE; + + -- Get the record + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + -- Loop over the indexing entries + FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP + + joiner := COALESCE(idx.joiner, default_joiner); + + SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; + + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); + ELSE + transformed_xml := bib.marc; + END IF; + + prev_xfrm := xfrm.name; + END IF; + + xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + + raw_text := NULL; + FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP + CONTINUE WHEN xml_node !~ E'^\\s*<'; + + -- XXX much of this should be moved into oils_xpath_string... + curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value( + oils_xpath( '//text()', + REGEXP_REPLACE( + REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded + REGEXP_REPLACE( -- This escapes embeded [^<]+)(<)([^>]+<)$re$, + E'\\1<\\3', + 'g' + ), + '&(?!amp;)', + '&', + 'g' + ), + E'\\s+', + ' ', + 'g' + ) + ), ' '), ''), + joiner + ); + + CONTINUE WHEN curr_text IS NULL OR curr_text = ''; + + IF raw_text IS NOT NULL THEN + raw_text := raw_text || joiner; + END IF; + + raw_text := COALESCE(raw_text,'') || curr_text; + + -- autosuggest/metabib.browse_entry + IF idx.browse_field THEN + + IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN + browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + browse_text := curr_text; + END IF; + + IF idx.browse_sort_xpath IS NOT NULL AND + idx.browse_sort_xpath <> '' THEN + + sort_value := oils_xpath_string( + idx.browse_sort_xpath, xml_node, joiner, + ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] + ); + ELSE + sort_value := browse_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); + output_row.sort_value := + public.naco_normalize(sort_value); + + output_row.authority := NULL; + + IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN + authority_text := oils_xpath_string( + idx.authority_xpath, xml_node, joiner, + ARRAY[ + ARRAY[xfrm.prefix, xfrm.namespace_uri], + ARRAY['xlink','http://www.w3.org/1999/xlink'] + ] + ); + + IF authority_text ~ '^\d+$' THEN + authority_link := authority_text::BIGINT; + PERFORM * FROM authority.record_entry WHERE id = authority_link; + IF FOUND THEN + output_row.authority := authority_link; + END IF; + END IF; + + END IF; + + output_row.browse_field = TRUE; + -- Returning browse rows with search_field = true for search+browse + -- configs allows us to retain granularity of being able to search + -- browse fields with "starts with" type operators (for example, for + -- titles of songs in music albums) + IF idx.search_field THEN + output_row.search_field = TRUE; + END IF; + RETURN NEXT output_row; + output_row.browse_field = FALSE; + output_row.search_field = FALSE; + output_row.sort_value := NULL; + END IF; + + -- insert raw node text for faceting + IF idx.facet_field THEN + + IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN + facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + facet_text := curr_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = -1 * idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g')); + + output_row.facet_field = TRUE; + RETURN NEXT output_row; + output_row.facet_field = FALSE; + END IF; + + END LOOP; + + CONTINUE WHEN raw_text IS NULL OR raw_text = ''; + + -- insert combined node text for searching + IF idx.search_field THEN + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); + + output_row.search_field = TRUE; + RETURN NEXT output_row; + output_row.search_field = FALSE; + END IF; + + END LOOP; + +END; + +$func$ LANGUAGE PLPGSQL; + + +CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ +DECLARE + fclass RECORD; + ind_data metabib.field_entry_template%ROWTYPE; + mbe_row metabib.browse_entry%ROWTYPE; + mbe_id BIGINT; + b_skip_facet BOOL; + b_skip_browse BOOL; + b_skip_search BOOL; + value_prepped TEXT; +BEGIN + + SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; + SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; + SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; + + PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; + IF NOT FOUND THEN + IF NOT b_skip_search THEN + FOR fclass IN SELECT * FROM config.metabib_class LOOP + -- RAISE NOTICE 'Emptying out %', fclass.name; + EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id; + END LOOP; + END IF; + IF NOT b_skip_facet THEN + DELETE FROM metabib.facet_entry WHERE source = bib_id; + END IF; + IF NOT b_skip_browse THEN + DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; + END IF; + END IF; + + FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP + IF ind_data.field < 0 THEN + ind_data.field = -1 * ind_data.field; + END IF; + + IF ind_data.facet_field AND NOT b_skip_facet THEN + INSERT INTO metabib.facet_entry (field, source, value) + VALUES (ind_data.field, ind_data.source, ind_data.value); + END IF; + + IF ind_data.browse_field AND NOT b_skip_browse THEN + -- A caveat about this SELECT: this should take care of replacing + -- old mbe rows when data changes, but not if normalization (by + -- which I mean specifically the output of + -- evergreen.oils_tsearch2()) changes. It may or may not be + -- expensive to add a comparison of index_vector to index_vector + -- to the WHERE clause below. + + value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); + SELECT INTO mbe_row * FROM metabib.browse_entry + WHERE value = value_prepped AND sort_value = ind_data.sort_value; + + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_entry + ( value, sort_value ) VALUES + ( value_prepped, ind_data.sort_value ); + + mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); + END IF; + + INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority) + VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); + END IF; + + IF ind_data.search_field AND NOT b_skip_search THEN + -- Avoid inserting duplicate rows + EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class || + '_field_entry WHERE field = $1 AND source = $2 AND value = $3' + INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value; + -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id; + IF mbe_id IS NULL THEN + EXECUTE $$ + INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value) + VALUES ($$ || + quote_literal(ind_data.field) || $$, $$ || + quote_literal(ind_data.source) || $$, $$ || + quote_literal(ind_data.value) || + $$);$$; + END IF; + END IF; + + END LOOP; + + IF NOT b_skip_search THEN + PERFORM metabib.update_combined_index_vectors(bib_id); + END IF; + + RETURN; +END; +$func$ LANGUAGE PLPGSQL; + +-- Don't use Title Proper search field as the browse field +UPDATE config.metabib_field SET browse_field = FALSE, browse_xpath = NULL, browse_sort_xpath = NULL WHERE id = 6; + +-- Create a new Title Proper browse config +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, search_field, authority_xpath, browse_field, browse_sort_xpath ) VALUES + (31, 'title', 'browse', oils_i18n_gettext(31, 'Title Proper (Browse)', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleBrowse$$, FALSE, '//@xlink:href', TRUE, $$*[local-name() != "nonSort"]$$ ); + +COMMIT; + +\qecho This is a browse-only reingest of your bib records. It may take a while. +\qecho You may cancel now without losing the effect of the rest of the +\qecho upgrade script, and arrange the reingest later. +\qecho . +SELECT metabib.reingest_metabib_field_entries(id, TRUE, FALSE, TRUE) + FROM biblio.record_entry; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.better_mods_for_browse_etc.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.data.better_mods_for_browse_etc.sql deleted file mode 100644 index e8a070f25a..0000000000 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.data.better_mods_for_browse_etc.sql +++ /dev/null @@ -1,3623 +0,0 @@ -BEGIN; - --- check whether patch can be applied ---SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); - --- 953.data.MODS32-xsl.sql -UPDATE config.xml_transform SET xslt=$$ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - BK - SE - - - BK - MM - CF - MP - VM - MU - - - - - - - - - b - afgk - - - - - abfgk - - - - - - - - - - - - - - - - - - <xsl:value-of select="substring($titleChop,@ind2+1)"/> - - - - - <xsl:value-of select="$titleChop"/> - - - - - - - - - b - b - afgk - - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="subfieldSelect"> - <xsl:with-param name="codes">abfgk</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - - abfgk - - - - - - - - - - - <xsl:value-of select="substring($titleBrowseChop,@ind2+1)"/> - - - - - <xsl:value-of select="$titleBrowseChop"/> - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="subfieldSelect"> - <xsl:with-param name="codes">a</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - - - a - - - - - - - - - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - - - - - - - - <xsl:value-of select="substring($titleChop,@ind2+1)"/> - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="subfieldSelect"> - <!-- 1/04 removed $h, $b --> - <xsl:with-param name="codes">af</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <xsl:value-of select="$titleChop"/> - - - - - - - - - - - <xsl:value-of select="substring($titleChop,$nfi+1)"/> - - - - - <xsl:value-of select="$titleChop"/> - - - - - - - - - - - - ah - - - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - <xsl:value-of select="substring($titleChop,@ind1+1)"/> - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - - - creator - - - - - - - - - - creator - - - - - - - - - - creator - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - personal - - - - - - - - - - - yes - - - yes - - - text - cartographic - notated music - sound recording-nonmusical - sound recording-musical - still image - moving image - three dimensional object - software, multimedia - mixed material - - - - globe - - - remote sensing image - - - - - - map - - - atlas - - - - - - - - database - - - loose-leaf - - - series - - - newspaper - - - periodical - - - web site - - - - - - - - abstract or summary - - - bibliography - - - catalog - - - dictionary - - - encyclopedia - - - handbook - - - legal article - - - index - - - discography - - - legislation - - - theses - - - survey of literature - - - review - - - programmed text - - - filmography - - - directory - - - statistics - - - technical report - - - legal case and case notes - - - law report or digest - - - treaty - - - - - - conference publication - - - - - - - - numeric data - - - database - - - font - - - game - - - - - - patent - - - festschrift - - - - biography - - - - - essay - - - drama - - - comic strip - - - fiction - - - humor, satire - - - letter - - - novel - - - short story - - - speech - - - - - - - biography - - - conference publication - - - drama - - - essay - - - fiction - - - folktale - - - history - - - humor, satire - - - memoir - - - poetry - - - rehearsal - - - reporting - - - sound - - - speech - - - - - - - art original - - - kit - - - art reproduction - - - diorama - - - filmstrip - - - legal article - - - picture - - - graphic - - - technical drawing - - - motion picture - - - chart - - - flash card - - - microscope slide - - - model - - - realia - - - slide - - - transparency - - - videorecording - - - toy - - - - - - - - - - abvxyz - - - - - - - - - - - code - marccountry - - - - - - - - code - iso3166 - - - - - - - - text - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - :,;/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - monographic - continuing - - - - - - - ab - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - reformatted digital - - - digitized microfilm - - - digitized other analog - - - - - - - - - - - - - - - -
braille
-
- -
print
-
- -
electronic
-
- -
microfiche
-
- -
microfilm
-
-
- - -
- - - - - -
-
- -
- - - - - -
-
- -
- - - - - -
-
- -
- - - - - -
-
- -
- - - - - -
-
- -
- - - - - -
-
- -
- -
-
- - - - access - - - preservation - - - replacement - - - - - -
chip cartridge
-
- -
computer optical disc cartridge
-
- -
magnetic disc
-
- -
magneto-optical disc
-
- -
optical disc
-
- -
remote
-
- -
tape cartridge
-
- -
tape cassette
-
- -
tape reel
-
- - -
celestial globe
-
- -
earth moon globe
-
- -
planetary or lunar globe
-
- -
terrestrial globe
-
- - -
kit
-
- - -
atlas
-
- -
diagram
-
- -
map
-
- -
model
-
- -
profile
-
- -
remote-sensing image
-
- -
section
-
- -
view
-
- - -
aperture card
-
- -
microfiche
-
- -
microfiche cassette
-
- -
microfilm cartridge
-
- -
microfilm cassette
-
- -
microfilm reel
-
- -
microopaque
-
- - -
film cartridge
-
- -
film cassette
-
- -
film reel
-
- - -
chart
-
- -
collage
-
- -
drawing
-
- -
flash card
-
- -
painting
-
- -
photomechanical print
-
- -
photonegative
-
- -
photoprint
-
- -
picture
-
- -
print
-
- -
technical drawing
-
- - -
notated music
-
- - -
filmslip
-
- -
filmstrip cartridge
-
- -
filmstrip roll
-
- -
other filmstrip type
-
- -
slide
-
- -
transparency
-
- -
remote-sensing image
-
- -
cylinder
-
- -
roll
-
- -
sound cartridge
-
- -
sound cassette
-
- -
sound disc
-
- -
sound-tape reel
-
- -
sound-track film
-
- -
wire recording
-
- - -
braille
-
- -
combination
-
- -
moon
-
- -
tactile, with no writing system
-
- - -
braille
-
- -
large print
-
- -
regular print
-
- -
text in looseleaf binder
-
- - -
videocartridge
-
- -
videocassette
-
- -
videodisc
-
- -
videoreel
-
- - - - - - - - - - abce - - - -
- - - - - - - - - - ab - - - - - - - - agrt - - - - - - - ab - - - - - - - - - adolescent - - - adult - - - general - - - juvenile - - - preschool - - - specialized - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - defg - - - - - - - - - - - - marcgac - - - - - - iso3166 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ab - - - - - - - abx - - - - - - - ab - - - - - - - - - - - - - - - - - - - - - - - - - - - - ab - - - - - - - - - - av - - - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - <xsl:value-of select="substring($titleChop,@ind2+1)"/> - - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="subfieldSelect"> - <xsl:with-param name="codes">av</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - - abcx3 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="specialSubfieldSelect"> - <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> - <xsl:with-param name="axis">t</xsl:with-param> - <xsl:with-param name="afterCodes">g</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - aq - t - g - - - - - - - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="specialSubfieldSelect"> - <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> - <xsl:with-param name="axis">t</xsl:with-param> - <xsl:with-param name="afterCodes">dg</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - - - - - - - - - c - t - dgn - - - - - - - - - - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="specialSubfieldSelect"> - <xsl:with-param name="anyCodes">tfklsv</xsl:with-param> - <xsl:with-param name="axis">t</xsl:with-param> - <xsl:with-param name="afterCodes">g</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - aqdc - t - gn - - - - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="subfieldSelect"> - <xsl:with-param name="codes">adfgklmorsv</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - - - - - - - - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - <xsl:value-of select="substring($titleChop,@ind1+1)"/> - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="specialSubfieldSelect"> - <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> - <xsl:with-param name="axis">t</xsl:with-param> - <xsl:with-param name="afterCodes">g</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - aq - t - g - - - - - - - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="specialSubfieldSelect"> - <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> - <xsl:with-param name="axis">t</xsl:with-param> - <xsl:with-param name="afterCodes">dg</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - - - - - - - - - c - t - dgn - - - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:call-template name="specialSubfieldSelect"> - <xsl:with-param name="anyCodes">tfklsv</xsl:with-param> - <xsl:with-param name="axis">t</xsl:with-param> - <xsl:with-param name="afterCodes">g</xsl:with-param> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - - - - - - - aqdc - t - gn - - - - - - - - - - - - - - adfgklmorsv - - - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - <xsl:value-of select="substring($titleChop,@ind2+1)"/> - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - - - - - - - - isbn - - - - - - - - - - isrc - - - - - - - - - - ismn - - - - - - - - - - sici - - - - ab - - - - - - issn - - - - - - - - lccn - - - - - - - - - - issue number - matrix number - music plate - music publisher - videorecording identifier - - - - - - - ba - ab - - - - - - - - - - ab - - - - - - - - doi - hdl - uri - - - - - - - - - - - - - - - - - y3z - - - - - - - - - - - - - - - - - - - - - y3 - - - - - - - z - - - - - - - - - - - - - - - - - - abje - - - - - - - - abcd35 - - - - - - - abcde35 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - n - n - fgkdlmor - - - - - p - p - fgkdlmor - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - g - g - pst - - - - - p - p - fgkdlmor - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cdn - - - - - - - - - - aq - - - - :,;/ - - - - - - - - - - acdeq - - - - - - constituent - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:value-of select="."></xsl:value-of> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:value-of select="."></xsl:value-of> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:value-of select="."></xsl:value-of> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - - <xsl:call-template name="chopPunctuation"> - <xsl:with-param name="chopString"> - <xsl:value-of select="."></xsl:value-of> - </xsl:with-param> - </xsl:call-template> - - - - - - - - - - - - - - - code - marcgac - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - lcsh - lcshac - mesh - - nal - csh - rvm - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - aq - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cdnp - - - - - - - - - - - - - - - - abcdeqnp - - - - - - - - - - - - - - - - - - - - - adfhklor - - - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - <xsl:value-of select="substring($titleChop,@ind1+1)"/> - - - - - - <xsl:value-of select="$titleChop" /> - - - - - - - - - - - - - - - - - abcd - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bc - - - - - - - - - - - - - - - - - - - - - - - - - - - yes - - - - - - - - - - - - - - - - - - - - - - - - - - - Arabic - Latin - Chinese, Japanese, Korean - Cyrillic - Hebrew - Greek - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - summary or subtitle - sung or spoken text - libretto - table of contents - accompanying material - translation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - summary or subtitle - sung or spoken text - libretto - table of contents - accompanying material - translation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .:,;/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
$$ WHERE name = 'mods32'; - -CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ -DECLARE - bib biblio.record_entry%ROWTYPE; - idx config.metabib_field%ROWTYPE; - xfrm config.xml_transform%ROWTYPE; - prev_xfrm TEXT; - transformed_xml TEXT; - xml_node TEXT; - xml_node_list TEXT[]; - facet_text TEXT; - browse_text TEXT; - sort_value TEXT; - raw_text TEXT; - curr_text TEXT; - joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? - authority_text TEXT; - authority_link BIGINT; - output_row metabib.field_entry_template%ROWTYPE; -BEGIN - - -- Start out with no field-use bools set - output_row.browse_field = FALSE; - output_row.facet_field = FALSE; - output_row.search_field = FALSE; - - -- Get the record - SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; - - -- Loop over the indexing entries - FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP - - joiner := COALESCE(idx.joiner, default_joiner); - - SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; - - -- See if we can skip the XSLT ... it's expensive - IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN - -- Can't skip the transform - IF xfrm.xslt <> '---' THEN - transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); - ELSE - transformed_xml := bib.marc; - END IF; - - prev_xfrm := xfrm.name; - END IF; - - xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); - - raw_text := NULL; - FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP - CONTINUE WHEN xml_node !~ E'^\\s*<'; - - -- XXX much of this should be moved into oils_xpath_string... - curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value( - oils_xpath( '//text()', - REGEXP_REPLACE( - REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded - REGEXP_REPLACE( -- This escapes embeded [^<]+)(<)([^>]+<)$re$, - E'\\1<\\3', - 'g' - ), - '&(?!amp;)', - '&', - 'g' - ), - E'\\s+', - ' ', - 'g' - ) - ), ' '), ''), - joiner - ); - - CONTINUE WHEN curr_text IS NULL OR curr_text = ''; - - IF raw_text IS NOT NULL THEN - raw_text := raw_text || joiner; - END IF; - - raw_text := COALESCE(raw_text,'') || curr_text; - - -- autosuggest/metabib.browse_entry - IF idx.browse_field THEN - - IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN - browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); - ELSE - browse_text := curr_text; - END IF; - - IF idx.browse_sort_xpath IS NOT NULL AND - idx.browse_sort_xpath <> '' THEN - - sort_value := oils_xpath_string( - idx.browse_sort_xpath, xml_node, joiner, - ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] - ); - ELSE - sort_value := browse_text; - END IF; - - output_row.field_class = idx.field_class; - output_row.field = idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); - output_row.sort_value := - public.naco_normalize(sort_value); - - output_row.authority := NULL; - - IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN - authority_text := oils_xpath_string( - idx.authority_xpath, xml_node, joiner, - ARRAY[ - ARRAY[xfrm.prefix, xfrm.namespace_uri], - ARRAY['xlink','http://www.w3.org/1999/xlink'] - ] - ); - - IF authority_text ~ '^\d+$' THEN - authority_link := authority_text::BIGINT; - PERFORM * FROM authority.record_entry WHERE id = authority_link; - IF FOUND THEN - output_row.authority := authority_link; - END IF; - END IF; - - END IF; - - output_row.browse_field = TRUE; - -- Returning browse rows with search_field = true for search+browse - -- configs allows us to retain granularity of being able to search - -- browse fields with "starts with" type operators (for example, for - -- titles of songs in music albums) - IF idx.search_field THEN - output_row.search_field = TRUE; - END IF; - RETURN NEXT output_row; - output_row.browse_field = FALSE; - output_row.search_field = FALSE; - output_row.sort_value := NULL; - END IF; - - -- insert raw node text for faceting - IF idx.facet_field THEN - - IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN - facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); - ELSE - facet_text := curr_text; - END IF; - - output_row.field_class = idx.field_class; - output_row.field = -1 * idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g')); - - output_row.facet_field = TRUE; - RETURN NEXT output_row; - output_row.facet_field = FALSE; - END IF; - - END LOOP; - - CONTINUE WHEN raw_text IS NULL OR raw_text = ''; - - -- insert combined node text for searching - IF idx.search_field THEN - output_row.field_class = idx.field_class; - output_row.field = idx.id; - output_row.source = rid; - output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); - - output_row.search_field = TRUE; - RETURN NEXT output_row; - output_row.search_field = FALSE; - END IF; - - END LOOP; - -END; - -$func$ LANGUAGE PLPGSQL; - - -CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ -DECLARE - fclass RECORD; - ind_data metabib.field_entry_template%ROWTYPE; - mbe_row metabib.browse_entry%ROWTYPE; - mbe_id BIGINT; - b_skip_facet BOOL; - b_skip_browse BOOL; - b_skip_search BOOL; - value_prepped TEXT; -BEGIN - - SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; - SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; - SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; - - PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; - IF NOT FOUND THEN - IF NOT b_skip_search THEN - FOR fclass IN SELECT * FROM config.metabib_class LOOP - -- RAISE NOTICE 'Emptying out %', fclass.name; - EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id; - END LOOP; - END IF; - IF NOT b_skip_facet THEN - DELETE FROM metabib.facet_entry WHERE source = bib_id; - END IF; - IF NOT b_skip_browse THEN - DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; - END IF; - END IF; - - FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP - IF ind_data.field < 0 THEN - ind_data.field = -1 * ind_data.field; - END IF; - - IF ind_data.facet_field AND NOT b_skip_facet THEN - INSERT INTO metabib.facet_entry (field, source, value) - VALUES (ind_data.field, ind_data.source, ind_data.value); - END IF; - - IF ind_data.browse_field AND NOT b_skip_browse THEN - -- A caveat about this SELECT: this should take care of replacing - -- old mbe rows when data changes, but not if normalization (by - -- which I mean specifically the output of - -- evergreen.oils_tsearch2()) changes. It may or may not be - -- expensive to add a comparison of index_vector to index_vector - -- to the WHERE clause below. - - value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); - SELECT INTO mbe_row * FROM metabib.browse_entry - WHERE value = value_prepped AND sort_value = ind_data.sort_value; - - IF FOUND THEN - mbe_id := mbe_row.id; - ELSE - INSERT INTO metabib.browse_entry - ( value, sort_value ) VALUES - ( value_prepped, ind_data.sort_value ); - - mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); - END IF; - - INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority) - VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); - END IF; - - IF ind_data.search_field AND NOT b_skip_search THEN - -- Avoid inserting duplicate rows - EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class || - '_field_entry WHERE field = $1 AND source = $2 AND value = $3' - INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value; - -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id; - IF mbe_id IS NULL THEN - EXECUTE $$ - INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value) - VALUES ($$ || - quote_literal(ind_data.field) || $$, $$ || - quote_literal(ind_data.source) || $$, $$ || - quote_literal(ind_data.value) || - $$);$$; - END IF; - END IF; - - END LOOP; - - IF NOT b_skip_search THEN - PERFORM metabib.update_combined_index_vectors(bib_id); - END IF; - - RETURN; -END; -$func$ LANGUAGE PLPGSQL; - --- Don't use Title Proper search field as the browse field -UPDATE config.metabib_field SET browse_field = FALSE, browse_xpath = NULL, browse_sort_xpath = NULL WHERE id = 6; - --- Create a new Title Proper browse config -INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, search_field, authority_xpath, browse_field, browse_sort_xpath ) VALUES - (31, 'title', 'browse', oils_i18n_gettext(31, 'Title Proper (Browse)', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleBrowse$$, FALSE, '//@xlink:href', TRUE, $$*[local-name() != "nonSort"]$$ ); - -COMMIT; - -\qecho This is a browse-only reingest of your bib records. It may take a while. -\qecho You may cancel now without losing the effect of the rest of the -\qecho upgrade script, and arrange the reingest later. -\qecho . -SELECT metabib.reingest_metabib_field_entries(id, TRUE, FALSE, TRUE) - FROM biblio.record_entry;