\qecho ''
\qecho '\\a'
\qecho '\\t'
-\qecho '\\o symspell-title.txt'
-\qecho 'select 'title:'||value from metabib.title_field_entry;'
-\qecho '\\o symspell-author.txt'
-\qecho 'select 'author:'||value from metabib.author_field_entry;'
-\qecho '\\o symspell-subject.txt'
-\qecho 'select 'subject:'||value from metabib.subject_field_entry;'
-\qecho '\\o symspell-series.txt'
-\qecho 'select 'series:'||value from metabib.series_field_entry;'
-\qecho '\\o symspell-identifier.txt'
-\qecho 'select 'identifier:'||value from metabib.identifier_field_entry;'
-\qecho '\\o symspell-keyword.txt'
-\qecho 'select 'keyword:'||value from metabib.keyword_field_entry;'
+\qecho ''
+\qecho '\\o title'
+\qecho 'select value from metabib.title_field_entry;'
+\qecho '\\o author'
+\qecho 'select value from metabib.author_field_entry;'
+\qecho '\\o subject'
+\qecho 'select value from metabib.subject_field_entry;'
+\qecho '\\o series'
+\qecho 'select value from metabib.series_field_entry;'
+\qecho '\\o identifier'
+\qecho 'select value from metabib.identifier_field_entry;'
+\qecho '\\o keyword'
+\qecho 'select value from metabib.keyword_field_entry;'
+\qecho ''
\qecho '\\o'
\qecho '\\a'
\qecho '\\t'
\qecho ''
-\qecho 'Then, at the command line:'
+\qecho '// Then, at the command line:'
\qecho ''
-\qecho '$ ~/Evergreen-tarball-path/Open-ILS/src/support-scripts/symspell-sideload.pl \'
-\qecho ' symspell-title.txt \'
-\qecho ' symspell-author.txt \'
-\qecho ' symspell-subject.txt \'
-\qecho ' symspell-series.txt \'
-\qecho ' symspell-identifier.txt \'
-\qecho ' symspell-keyword.txt > sideload.sql'
+\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql'
\qecho ''
-\qecho 'And, back in psql'
+\qecho '// And, back in psql'
\qecho ''
\qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;'
\qecho 'TRUNCATE search.symspell_dictionary;'
\qecho ''
-\qecho '\\i sideload.sql'
+\qecho '\i identifier.sql'
+\qecho '\i author.sql'
+\qecho '\i title.sql'
+\qecho '\i subject.sql'
+\qecho '\i series.sql'
+\qecho '\i keyword.sql'
\qecho ''
\qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;'
\qecho 'REINDEX TABLE search.symspell_dictionary;'
\qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;'
\qecho 'VACUUM ANALYZE search.symspell_dictionary;'
\qecho ''
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_title;'
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_author;'
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_subject;'
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_series;'
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_identifier;'
+\qecho 'DROP TABLE search.search.symspell_dictionary_partial_keyword;'
/* To run by hand:
\a
\t
-\o symspell-title.txt
-select 'title:'||value from metabib.title_field_entry;
+\o title
+select value from metabib.title_field_entry;
-\o symspell-author.txt
-select 'author:'||value from metabib.author_field_entry;
+\o author
+select value from metabib.author_field_entry;
-\o symspell-subject.txt
-select 'subject:'||value from metabib.subject_field_entry;
+\o subject
+select value from metabib.subject_field_entry;
-\o symspell-series.txt
-select 'series:'||value from metabib.series_field_entry;
+\o series
+select value from metabib.series_field_entry;
-\o symspell-identifier.txt
-select 'identifier:'||value from metabib.identifier_field_entry;
+\o identifier
+select value from metabib.identifier_field_entry;
-\o symspell-keyword.txt
-select 'keyword:'||value from metabib.keyword_field_entry;
+\o keyword
+select value from metabib.keyword_field_entry;
\o
\a
// Then, at the command line:
-$ ~/Evergreen-tarball-path/Open-ILS/src/support-scripts/symspell-sideload.pl \
- symspell-title.txt \
- symspell-author.txt \
- symspell-subject.txt \
- symspell-series.txt \
- symspell-identifier.txt \
- symspell-keyword.txt > sideload.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql
+$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql
+
+// To the extent your hardware allows, the above commands can be run in
+// in parallel, in different shells. Each will use a full CPU, and RAM
+// may be a limiting resource, so keep an eye on that with `top`.
+
// And, back in psql
ALTER TABLE search.symspell_dictionary SET UNLOGGED;
TRUNCATE search.symspell_dictionary;
-\i sideload.sql
+\i identifier.sql
+\i author.sql
+\i title.sql
+\i subject.sql
+\i series.sql
+\i keyword.sql
CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;
REINDEX TABLE search.symspell_dictionary;
ALTER TABLE search.symspell_dictionary SET LOGGED;
VACUUM ANALYZE search.symspell_dictionary;
+DROP TABLE search.search.symspell_dictionary_partial_title;
+DROP TABLE search.search.symspell_dictionary_partial_author;
+DROP TABLE search.search.symspell_dictionary_partial_subject;
+DROP TABLE search.search.symspell_dictionary_partial_series;
+DROP TABLE search.search.symspell_dictionary_partial_identifier;
+DROP TABLE search.search.symspell_dictionary_partial_keyword;
+
*/
my $etime;
my $secs;
-my %classes = (
- title => 0,
- author => 1,
- subject => 2,
- series => 3,
- keyword => 4,
- identifier => 5
-);
+my $class = $ARGV[0];
my $stime = time;
-while (<>) {
+while (my $data = <>) {
my $line = $.;
- chomp(); $_=lc($_);
- my ($class,$data) = m/^(\w+):(.*$)/;
-
- my $ckey = $class.'_count';
- my $skey = $class.'_suggestions';
+ chomp($data); $data=lc($data);
my @words;
while( $data =~ m/([\w\d]+'*[\w\d]*)/g ) {
for my $raw (uniq @words) {
my $key = $raw;
- $dict{$key} //= { map {$_.'_count' => 0, $_.'_suggestions' => {}} keys %classes };
- $dict{$key}{$ckey}++;
- if ($dict{$key}{$ckey} == 1) { # first time we've seen it in this class, need to generate prefix keys
- $dict{$key}{$skey}{$raw} = 1;
+ $dict{$key} //= [0,[]];
+ $dict{$key}[0]++;
+
+ if ($dict{$key}[0] == 1) { # first time we've seen it, need to generate prefix keys
+ push @{$dict{$key}[1]}, $raw;
if (length($raw) > $plen) {
$key = substr($raw,0,$plen);
- $dict{$key} //= { map {$_.'_count' => 0, $_.'_suggestions' => {}} keys %classes };
- $dict{$key}{$skey}{$raw} = 1;
+ $dict{$key} //= [0,[]];
+ push @{$dict{$key}[1]}, $raw;
}
for my $edit (symspell_generate_edits($key, 1)) {
- $dict{$edit} //= { map {$_.'_count' => 0, $_.'_suggestions' => {}} keys %classes };
- $dict{$edit}{$skey}{$raw} = 1;
+ $dict{$edit} //= [0,[]];
+ push @{$dict{$edit}[1]}, $raw;
}
}
}
$stime = time;
my $counter = 0;
-my @keymap = (map { [$_.'_count', $_.'_suggestions'] } sort keys %classes);
+print <<"SQL";
+CREATE UNLOGGED TABLE search.symspell_dictionary_partial_$class (
+ prefix_key TEXT,
+ ${class}_count INT,
+ ${class}_suggestions TEXT[]
+) FROM STDIN;
-print 'COPY search.symspell_dictionary (prefix_key, '. join(', ', map { ($$_[0], $$_[1]) } @keymap) . ") FROM STDIN;\n";
+COPY search.search.symspell_dictionary_partial_$class FROM STDIN;
+SQL
while ( my ($key, $cl_dict) = each %dict ) {
$counter++;
- print join( "\t", $key, map {
- ($$cl_dict{$$_[0]}, (keys %{$$cl_dict{$$_[1]}} ? '{'.join(',', uniq(keys %{$$cl_dict{$$_[1]}})).'}' : '\N'))
- } @keymap) . "\n";
-
+ print join( "\t", $key, $$cl_dict[0], (scalar(@{$$cl_dict[1]}) ? '{'.join(',', uniq @{$$cl_dict[1]}).'}' : '\N')) . "\n";
delete $dict{$key};
}
-print '\.'."\n\n";
+print <<"SQL";
+\\.
+
+INSERT INTO search.symspell_dictionary (prefix_key, ${class}_count, ${class}_suggestions)
+ SELECT * FROM search.search.symspell_dictionary_partial_$class
+ ON CONFILICT (prefix_key) DO UPDATE
+ SET ${class}_count = EXCLUDED.${class}_count,
+ ${class}_suggestions = EXCLUDED.${class}_suggestions;
+SQL
$etime = time;
$secs = $etime - $stime;