From 3a146c72b8595952afb8abbfb1fcd03345605913 Mon Sep 17 00:00:00 2001 From: dbs Date: Mon, 13 Sep 2010 14:27:59 +0000 Subject: [PATCH] Add stock versions of O:A:Ingest and O:A:Storage:Driver:Pg:fts Prepping for custom indexing patches git-svn-id: svn://svn.open-ils.org/ILS-Contrib/conifer/branches/rel_1_6_1@986 6d9bc8c9-1ec2-4278-b937-99fde70a366f --- src/perlmods/OpenILS/Application/Ingest.pm | 1453 ++++++++++++++++++++ .../OpenILS/Application/Storage/Driver/Pg/fts.pm | 89 ++ 2 files changed, 1542 insertions(+) create mode 100644 src/perlmods/OpenILS/Application/Ingest.pm create mode 100644 src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm diff --git a/src/perlmods/OpenILS/Application/Ingest.pm b/src/perlmods/OpenILS/Application/Ingest.pm new file mode 100644 index 0000000000..30892b517b --- /dev/null +++ b/src/perlmods/OpenILS/Application/Ingest.pm @@ -0,0 +1,1453 @@ +package OpenILS::Application::Ingest; +use OpenILS::Application; +use base qw/OpenILS::Application/; + +use Unicode::Normalize; +use OpenSRF::EX qw/:try/; + +use OpenSRF::AppSession; +use OpenSRF::Utils::SettingsClient; +use OpenSRF::Utils::Logger qw/:level/; + +use OpenILS::Application::AppUtils; +use OpenILS::Utils::ScriptRunner; +use OpenILS::Utils::Fieldmapper; +use OpenSRF::Utils::JSON; + +use OpenILS::Utils::Fieldmapper; + +use XML::LibXML; +use XML::LibXSLT; +use Time::HiRes qw(time); + +our %supported_formats = ( + mods33 => {ns => 'http://www.loc.gov/mods/v3'}, + mods32 => {ns => 'http://www.loc.gov/mods/v3'}, + mods3 => {ns => 'http://www.loc.gov/mods/v3'}, + mods => {ns => 'http://www.loc.gov/mods/'}, + marcxml => {ns => 'http://www.loc.gov/MARC21/slim'}, + srw_dc => {ns => 'info:srw/schema/1/dc-schema'}, + oai_dc => {ns => 'http://www.openarchives.org/OAI/2.0/oai_dc/'}, + rdf_dc => {ns => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}, + atom => {ns => 'http://www.w3.org/2005/Atom'}, + rss091 => {ns => 'http://my.netscape.com/rdf/simple/0.9/'}, + rss092 => {ns => ''}, + rss093 => {ns => ''}, + rss094 => {ns => ''}, + rss10 => {ns => 'http://purl.org/rss/1.0/'}, + rss11 => {ns => 'http://purl.org/net/rss1.1#'}, + rss2 => {ns => ''}, +); + + +my $log = 'OpenSRF::Utils::Logger'; + +my $parser = XML::LibXML->new(); +my $xslt = XML::LibXSLT->new(); + +my $mods_sheet; +my $mads_sheet; +my $xpathset = {}; +sub initialize {} +sub child_init {} + +sub post_init { + + unless (keys %$xpathset) { + $log->debug("Running post_init", DEBUG); + + my $xsldir = OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl'); + + unless ($supported_formats{mods}{xslt}) { + $log->debug("Loading MODS XSLT", DEBUG); + my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS.xsl"); + $supported_formats{mods}{xslt} = $xslt->parse_stylesheet( $xslt_doc ); + } + + unless ($supported_formats{mods3}{xslt}) { + $log->debug("Loading MODS v3 XSLT", DEBUG); + my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS3.xsl"); + $supported_formats{mods3}{xslt} = $xslt->parse_stylesheet( $xslt_doc ); + } + + unless ($supported_formats{mods32}{xslt}) { + $log->debug("Loading MODS v32 XSLT", DEBUG); + my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS32.xsl"); + $supported_formats{mods32}{xslt} = $xslt->parse_stylesheet( $xslt_doc ); + } + + unless ($supported_formats{mods33}{xslt}) { + $log->debug("Loading MODS v33 XSLT", DEBUG); + my $xslt_doc = $parser->parse_file( $xsldir . "/MARC21slim2MODS33.xsl"); + $supported_formats{mods33}{xslt} = $xslt->parse_stylesheet( $xslt_doc ); + } + + my $req = OpenSRF::AppSession + ->create('open-ils.cstore') + + # XXX testing new metabib field use for faceting + #->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { id => { '!=' => undef } } ) + ->request( 'open-ils.cstore.direct.config.metabib_field.search.atomic', { search_field => 't' } ) + + ->gather(1); + + if (ref $req and @$req) { + for my $f (@$req) { + $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath; + $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id; + $xpathset->{ $f->field_class }->{ $f->name }->{format} = $f->format; + $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG); + } + } + } +} + +# -------------------------------------------------------------------------------- +# Biblio ingest + +package OpenILS::Application::Ingest::Biblio; +use base qw/OpenILS::Application::Ingest/; +use Unicode::Normalize; + +sub rw_biblio_ingest_single_object { + my $self = shift; + my $client = shift; + my $bib = shift; + + my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib); + return undef unless ($blob); + + $bib->fingerprint( $blob->{fingerprint}->{fingerprint} ); + $bib->quality( $blob->{fingerprint}->{quality} ); + + my $cstore = OpenSRF::AppSession->connect('open-ils.cstore'); + + my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1); + my $tmp; + + # update uri stuff ... + + # gather URI call numbers for this record + my $uri_cns = $u->{call_number} = $cstore->request( + 'open-ils.cstore.direct.asset.call_number.id_list.atomic' => { record => $bib->id, label => '##URI##' } + )->gather(1); + + if (@$uri_cns) { + # gather the maps for those call numbers + my $uri_maps = $u->{call_number} = $cstore->request( + 'open-ils.cstore.direct.asset.uri_call_number_map.id_list.atomic' => { call_number => $uri_cns } + )->gather(1); + + # delete the old maps + $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.delete' => $_ )->gather(1) for (@$uri_maps); + + # and delete the call numbers if there are no more URIs + if (!@{ $blob->{uri} }) { + $cstore->request( 'open-ils.cstore.direct.asset.call_number.delete' => $_ )->gather(1) for (@$uri_cns); + } + } + + # now, add CNs, URIs and maps + my %new_cns_by_owner; + my %new_uris_by_owner; + for my $u ( @{ $blob->{uri} } ) { + + my $owner = $u->{call_number}->owning_lib; + + if ($u->{call_number}->isnew) { + if ($new_cns_by_owner{$owner}) { + $u->{call_number} = $new_cns_by_owner{$owner}; + } else { + $u->{call_number}->clear_id; + $u->{call_number} = $new_cns_by_owner{$owner} = $cstore->request( + 'open-ils.cstore.direct.asset.call_number.create' => $u->{call_number} + )->gather(1); + } + } + + if ($u->{uri}->isnew) { + if ($new_uris_by_owner{$owner}) { + $u->{uri} = $new_uris_by_owner{$owner}; + } else { + $u->{uri} = $new_uris_by_owner{$owner} = $cstore->request( + 'open-ils.cstore.direct.asset.uri.create' => $u->{uri} + )->gather(1); + } + } + + # Check for an existing CN-URI map + $tmp = $cstore->request( + 'open-ils.cstore.direct.asset.uri_call_number_map.id_list', + { call_number => $u->{call_number}->id, uri => $u->{uri}->id } + )->gather(1); + + next if ($tmp); + + my $umap = Fieldmapper::asset::uri_call_number_map->new; + $umap->uri($u->{uri}->id); + $umap->call_number($u->{call_number}->id); + + $cstore->request( 'open-ils.cstore.direct.asset.uri_call_number_map.create' => $umap )->gather(1); + } + + # update full_rec stuff ... + $tmp = $cstore->request( + 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic', + { record => $bib->id } + )->gather(1); + + $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp); + $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} }); + + # update rec_descriptor stuff ... + $tmp = $cstore->request( + 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic', + { record => $bib->id } + )->gather(1); + + $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp); + $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1); + + # deal with classed fields... + for my $class ( qw/title author subject keyword series/ ) { + $tmp = $cstore->request( + "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic", + { source => $bib->id } + )->gather(1); + + $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp); + } + for my $obj ( @{ $blob->{field_entries} } ) { + my $class = $obj->class_name; + $class =~ s/^Fieldmapper:://o; + $class =~ s/::/./go; + $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1); + } + + # update MR map ... + + $tmp = $cstore->request( + 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic', + { source => $bib->id } + )->gather(1); + + $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp); + + # get the old MRs + my $old_mrs = $cstore->request( + 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] } + )->gather(1) if (@$tmp); + + $old_mrs = [] if (!ref($old_mrs)); + + my $mr; + for my $m (@$old_mrs) { + if ($m->fingerprint eq $bib->fingerprint) { + $mr = $m; + } else { + my $others = $cstore->request( + 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id } + )->gather(1); + + if (!@$others) { + $cstore->request( + 'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id + )->gather(1); + } + + $m->isdeleted(1); + } + } + + my $holds; + if (!$mr) { + # Get the matchin MR, if any. + $mr = $cstore->request( + 'open-ils.cstore.direct.metabib.metarecord.search', + { fingerprint => $bib->fingerprint } + )->gather(1); + + $holds = $cstore->request( + 'open-ils.cstore.direct.action.hold_request.search.atomic', + { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] } + )->gather(1) if (@$old_mrs); + + if ($mr) { + for my $h (@$holds) { + $h->target($mr); + $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1); + $h->ischanged(1); + } + } + } + + if (!$mr) { + $mr = new Fieldmapper::metabib::metarecord; + $mr->fingerprint( $bib->fingerprint ); + $mr->master_record( $bib->id ); + $mr->id( + $cstore->request( + "open-ils.cstore.direct.metabib.metarecord.create", + $mr => { quiet => 'true' } + )->gather(1) + ); + + for my $h (grep { !$_->ischanged } @$holds) { + $h->target($mr); + $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1); + } + } else { + my $mrm = $cstore->request( + 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic', + { metarecord => $mr->id } + )->gather(1); + + if (@$mrm) { + my $best = $cstore->request( + "open-ils.cstore.direct.biblio.record_entry.search", + { id => [ map { $_->source } @$mrm ] }, + { 'select' => { bre => [ qw/id quality/ ] }, + order_by => { bre => "quality desc" }, + limit => 1, + } + )->gather(1); + + if ($best->quality > $bib->quality) { + $mr->master_record($best->id); + } else { + $mr->master_record($bib->id); + } + } else { + $mr->master_record($bib->id); + } + + $mr->clear_mods; + + $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1); + } + + my $mrm = new Fieldmapper::metabib::metarecord_source_map; + $mrm->source($bib->id); + $mrm->metarecord($mr->id); + + $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1); + $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1); + + $cstore->request( 'open-ils.cstore.json_query.atomic' => { from => [ 'reporter.simple_rec_update', $bib->id ] } )->gather(1); + + $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;; + $cstore->disconnect; + + return $bib->id; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.biblio.object", + method => "rw_biblio_ingest_single_object", + api_level => 1, + argc => 1, +); + +sub rw_biblio_ingest_single_record { + my $self = shift; + my $client = shift; + my $rec = shift; + + OpenILS::Application::Ingest->post_init(); + my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' ); + $cstore->request('open-ils.cstore.transaction.begin')->gather(1); + + my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1); + + $cstore->request('open-ils.cstore.transaction.rollback')->gather(1); + $cstore->disconnect; + + return undef unless ($r and @$r); + + return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0]; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.biblio.record", + method => "rw_biblio_ingest_single_record", + api_level => 1, + argc => 1, +); + +sub rw_biblio_ingest_record_list { + my $self = shift; + my $client = shift; + my @rec = ref($_[0]) ? @{ $_[0] } : @_ ; + + OpenILS::Application::Ingest->post_init(); + my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' ); + $cstore->request('open-ils.cstore.transaction.begin')->gather(1); + + my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => \@rec } )->gather(1); + + $cstore->request('open-ils.cstore.transaction.rollback')->gather(1); + $cstore->disconnect; + + return undef unless ($r and @$r); + + my $count = 0; + for (@$r) { + if (($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0]) { + $count++ + } + } + return $count; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.biblio.record_list", + method => "rw_biblio_ingest_record_list", + api_level => 1, + argc => 1, +); + +sub ro_biblio_ingest_single_object { + my $self = shift; + my $client = shift; + my $bib = shift; + my $xml = OpenILS::Application::AppUtils->entityize($bib->marc); + my $max_cn = shift; + my $max_uri = shift; + + my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' ); + + if (!$max_cn) { + my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1); + $max_cn = int($cn->id) + 1000; + } + + if (!$max_uri) { + my $cn = $cstore->request( 'open-ils.cstore.direct.asset.call_number.search' => { id => { '!=' => undef } }, { limit => 1, order_by => { acn => 'id desc' } } )->gather(1); + $max_uri = int($cn->id) + 1000; + } + + $cstore->disconnect; + + my $document = $parser->parse_string($xml); + + my @uris = $self->method_lookup("open-ils.ingest.856_uri.object")->run($bib, $max_cn, $max_uri); + my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document); + my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document); + my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml); + my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml); + + $_->source($bib->id) for (@mXfe); + $_->record($bib->id) for (@mfr); + $rd->record($bib->id) if ($rd); + + return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd, uri => \@uris }; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.biblio.object.readonly", + method => "ro_biblio_ingest_single_object", + api_level => 1, + argc => 1, +); + +sub ro_biblio_ingest_single_xml { + my $self = shift; + my $client = shift; + my $xml = OpenILS::Application::AppUtils->entityize(shift); + + my $document = $parser->parse_string($xml); + + my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document); + my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document); + my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml); + my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml); + + return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd }; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.biblio.xml.readonly", + method => "ro_biblio_ingest_single_xml", + api_level => 1, + argc => 1, +); + +sub ro_biblio_ingest_single_record { + my $self = shift; + my $client = shift; + my $rec = shift; + + OpenILS::Application::Ingest->post_init(); + my $r = OpenSRF::AppSession + ->create('open-ils.cstore') + ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec ) + ->gather(1); + + return undef unless ($r and @$r); + + my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc); + + $_->source($rec) for (@{$res->{field_entries}}); + $_->record($rec) for (@{$res->{full_rec}}); + $res->{descriptor}->record($rec); + + return $res; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.biblio.record.readonly", + method => "ro_biblio_ingest_single_record", + api_level => 1, + argc => 1, +); + +sub ro_biblio_ingest_stream_record { + my $self = shift; + my $client = shift; + + OpenILS::Application::Ingest->post_init(); + + my $ses = OpenSRF::AppSession->create('open-ils.cstore'); + + while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { + + my $rec = $resp->content; + last unless (defined $rec); + + $log->debug("Running open-ils.ingest.full.biblio.record.readonly ..."); + my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec); + + $_->source($rec) for (@{$res->{field_entries}}); + $_->record($rec) for (@{$res->{full_rec}}); + + $client->respond( $res ); + } + + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.biblio.record_stream.readonly", + method => "ro_biblio_ingest_stream_record", + api_level => 1, + stream => 1, +); + +sub ro_biblio_ingest_stream_xml { + my $self = shift; + my $client = shift; + + OpenILS::Application::Ingest->post_init(); + + my $ses = OpenSRF::AppSession->create('open-ils.cstore'); + + while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { + + my $xml = $resp->content; + last unless (defined $xml); + + $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ..."); + my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml); + + $client->respond( $res ); + } + + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.biblio.xml_stream.readonly", + method => "ro_biblio_ingest_stream_xml", + api_level => 1, + stream => 1, +); + +sub rw_biblio_ingest_stream_import { + my $self = shift; + my $client = shift; + + OpenILS::Application::Ingest->post_init(); + + my $ses = OpenSRF::AppSession->create('open-ils.cstore'); + + while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { + + my $bib = $resp->content; + last unless (defined $bib); + + $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ..."); + my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc); + + $_->source($bib->id) for (@{$res->{field_entries}}); + $_->record($bib->id) for (@{$res->{full_rec}}); + + $client->respond( $res ); + } + + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.biblio.bib_stream.import", + method => "rw_biblio_ingest_stream_import", + api_level => 1, + stream => 1, +); + + +# -------------------------------------------------------------------------------- +# Authority ingest + +package OpenILS::Application::Ingest::Authority; +use base qw/OpenILS::Application::Ingest/; +use Unicode::Normalize; + +sub rw_authority_ingest_single_object { + my $self = shift; + my $client = shift; + my $auth = shift; + + my ($blob) = $self->method_lookup("open-ils.ingest.full.authority.object.readonly")->run($auth); + return undef unless ($blob); + + my $cstore = OpenSRF::AppSession->connect('open-ils.cstore'); + + my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1); + my $tmp; + + # update full_rec stuff ... + $tmp = $cstore->request( + 'open-ils.cstore.direct.authority.full_rec.id_list.atomic', + { record => $auth->id } + )->gather(1); + + $cstore->request( 'open-ils.cstore.direct.authority.full_rec.delete' => $_ )->gather(1) for (@$tmp); + $cstore->request( 'open-ils.cstore.direct.authority.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} }); + + # XXX when we start extracting authority descriptors and adding sources ... + # + # update rec_descriptor stuff ... + #$tmp = $cstore->request( + # 'open-ils.cstore.direct.authority.record_descriptor.id_list.atomic', + # { record => $auth->id } + #)->gather(1); + # + #$cstore->request( 'open-ils.cstore.direct.authority.record_descriptor.delete' => $_ )->gather(1) for (@$tmp); + #$cstore->request( 'open-ils.cstore.direct.authority.record_descriptor.create' => $blob->{descriptor} )->gather(1); + #$cstore->request( 'open-ils.cstore.direct.authority.record_entry.update' => $auth )->gather(1); + + $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;; + $cstore->disconnect; + + return $auth->id; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.authority.object", + method => "rw_authority_ingest_single_object", + api_level => 1, + argc => 1, +); + +sub rw_authority_ingest_single_record { + my $self = shift; + my $client = shift; + my $rec = shift; + + OpenILS::Application::Ingest->post_init(); + my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' ); + $cstore->request('open-ils.cstore.transaction.begin')->gather(1); + + my $r = $cstore->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )->gather(1); + + $cstore->request('open-ils.cstore.transaction.rollback')->gather(1); + $cstore->disconnect; + + return undef unless ($r and @$r); + + return ($self->method_lookup("open-ils.ingest.full.authority.object")->run($r))[0]; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.authority.record", + method => "rw_authority_ingest_single_record", + api_level => 1, + argc => 1, +); + +sub ro_authority_ingest_single_object { + my $self = shift; + my $client = shift; + my $bib = shift; + my $xml = OpenILS::Application::AppUtils->entityize($bib->marc); + + my $document = $parser->parse_string($xml); + + my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document); + + $_->record($bib->id) for (@mfr); + + return { full_rec => \@mfr }; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.authority.object.readonly", + method => "ro_authority_ingest_single_object", + api_level => 1, + argc => 1, +); + +sub ro_authority_ingest_single_xml { + my $self = shift; + my $client = shift; + my $xml = OpenILS::Application::AppUtils->entityize(shift); + + my $document = $parser->parse_string($xml); + + my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document); + + return { full_rec => \@mfr }; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.authority.xml.readonly", + method => "ro_authority_ingest_single_xml", + api_level => 1, + argc => 1, +); + +sub ro_authority_ingest_single_record { + my $self = shift; + my $client = shift; + my $rec = shift; + + OpenILS::Application::Ingest->post_init(); + my $r = OpenSRF::AppSession + ->create('open-ils.cstore') + ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec ) + ->gather(1); + + return undef unless ($r and @$r); + + my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc); + + $_->record($rec) for (@{$res->{full_rec}}); + $res->{descriptor}->record($rec); + + return $res; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.authority.record.readonly", + method => "ro_authority_ingest_single_record", + api_level => 1, + argc => 1, +); + +sub ro_authority_ingest_stream_record { + my $self = shift; + my $client = shift; + + OpenILS::Application::Ingest->post_init(); + + my $ses = OpenSRF::AppSession->create('open-ils.cstore'); + + while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { + + my $rec = $resp->content; + last unless (defined $rec); + + $log->debug("Running open-ils.ingest.full.authority.record.readonly ..."); + my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec); + + $_->record($rec) for (@{$res->{full_rec}}); + + $client->respond( $res ); + } + + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.authority.record_stream.readonly", + method => "ro_authority_ingest_stream_record", + api_level => 1, + stream => 1, +); + +sub ro_authority_ingest_stream_xml { + my $self = shift; + my $client = shift; + + OpenILS::Application::Ingest->post_init(); + + my $ses = OpenSRF::AppSession->create('open-ils.cstore'); + + while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { + + my $xml = $resp->content; + last unless (defined $xml); + + $log->debug("Running open-ils.ingest.full.authority.xml.readonly ..."); + my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml); + + $client->respond( $res ); + } + + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.authority.xml_stream.readonly", + method => "ro_authority_ingest_stream_xml", + api_level => 1, + stream => 1, +); + +sub rw_authority_ingest_stream_import { + my $self = shift; + my $client = shift; + + OpenILS::Application::Ingest->post_init(); + + my $ses = OpenSRF::AppSession->create('open-ils.cstore'); + + while (my ($resp) = $client->recv( count => 1, timeout => 5 )) { + + my $bib = $resp->content; + last unless (defined $bib); + + $log->debug("Running open-ils.ingest.full.authority.xml.readonly ..."); + my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc); + + $_->record($bib->id) for (@{$res->{full_rec}}); + + $client->respond( $res ); + } + + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.full.authority.bib_stream.import", + method => "rw_authority_ingest_stream_import", + api_level => 1, + stream => 1, +); + +# -------------------------------------------------------------------------------- +# MARC index extraction + +package OpenILS::Application::Ingest::XPATH; +use base qw/OpenILS::Application::Ingest/; +use Unicode::Normalize; + +# give this an XML documentElement and an XPATH expression +sub xpath_to_string { + my $xml = shift; + my $xpath = shift; + my $ns_uri = shift; + my $ns_prefix = shift; + my $unique = shift; + + $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix); + + my $string = ""; + + # grab the set of matching nodes + my @nodes = $xml->findnodes( $xpath ); + for my $value (@nodes) { + + # grab all children of the node + my @children = $value->childNodes(); + for my $child (@children) { + + # add the childs content to the growing buffer + my $content = quotemeta($child->textContent); + next if ($unique && $string =~ /$content/); # uniquify the values + $string .= $child->textContent . " "; + } + if( ! @children ) { + $string .= $value->textContent . " "; + } + } + + $string =~ s/(\w+)\/(\w+)/$1 $2/sgo; + # Split date ranges and ISSNs on the hyphen + $string =~ s/(\d{4})-(\d{3,4}x?)/ $1 $2 /goi; + + return NFD($string); +} + +sub class_index_string_xml { + my $self = shift; + my $client = shift; + my $xml = shift; + my @classes = @_; + + OpenILS::Application::Ingest->post_init(); + $xml = $parser->parse_string(OpenILS::Application::AppUtils->entityize($xml)) unless (ref $xml); + + my %transform_cache; + + for my $class (@classes) { + my $class_constructor = "Fieldmapper::metabib::${class}_field_entry"; + for my $type ( keys %{ $xpathset->{$class} } ) { + + my $def = $xpathset->{$class}->{$type}; + my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}}; + + my $document = $xml; + + if ($sf->{xslt}) { + $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml); + $transform_cache{$def->{format}} = $document; + } + + my $value = xpath_to_string( + $document->documentElement => $def->{xpath}, + $sf->{ns} => $def->{format}, + 1 + ); + + next unless $value; + + $value = NFD($value); + $value =~ s/\pM+//sgo; + $value =~ s/\pC+//sgo; + $value =~ s/\W+$//sgo; + + # hack to normalize ratio-like strings + while ($term =~ /\b\d{1}:[, ]?\d+(?:[ ,]\d+[^:])+/o) { + $term = $` . join ('', split(/[, ]/, $&)) . $'; + } + + $value =~ s/\b\.+\b//sgo; + $value = lc($value); + + my $fm = $class_constructor->new; + $fm->value( $value ); + $fm->field( $xpathset->{$class}->{$type}->{id} ); + $client->respond($fm); + } + } + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.field_entry.class.xml", + method => "class_index_string_xml", + api_level => 1, + argc => 2, + stream => 1, +); + +sub class_index_string_record { + my $self = shift; + my $client = shift; + my $rec = shift; + my @classes = shift; + + OpenILS::Application::Ingest->post_init(); + my $r = OpenSRF::AppSession + ->create('open-ils.cstore') + ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec ) + ->gather(1); + + return undef unless ($r and @$r); + + for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) { + $fm->source($rec); + $client->respond($fm); + } + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.field_entry.class.record", + method => "class_index_string_record", + api_level => 1, + argc => 2, + stream => 1, +); + +sub all_index_string_xml { + my $self = shift; + my $client = shift; + my $xml = shift; + + for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) { + $client->respond($fm); + } + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.extract.field_entry.all.xml", + method => "all_index_string_xml", + api_level => 1, + argc => 1, + stream => 1, +); + +sub all_index_string_record { + my $self = shift; + my $client = shift; + my $rec = shift; + + OpenILS::Application::Ingest->post_init(); + my $r = OpenSRF::AppSession + ->create('open-ils.cstore') + ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec ) + ->gather(1); + + return undef unless ($r and @$r); + + for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) { + $fm->source($rec); + $client->respond($fm); + } + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.extract.field_entry.all.record", + method => "all_index_string_record", + api_level => 1, + argc => 1, + stream => 1, +); + +# -------------------------------------------------------------------------------- +# Flat MARC + +package OpenILS::Application::Ingest::FlatMARC; +use base qw/OpenILS::Application::Ingest/; +use Unicode::Normalize; + + +sub _marcxml_to_full_rows { + + my $marcxml = shift; + my $xmltype = shift || 'metabib'; + + my $type = "Fieldmapper::${xmltype}::full_rec"; + + my @ns_list; + + my ($root) = $marcxml->findnodes('//*[local-name()="record"]'); + + for my $tagline ( @{$root->getChildrenByTagName("leader")} ) { + next unless $tagline; + _special_tag_to_full_rows($type, $tagline, \@ns_list, 'LDR'); + } + + for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) { + next unless $tagline; + _special_tag_to_full_rows($type, $tagline, \@ns_list, $tagline->getAttribute( "tag" )); + } + + for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) { + next unless $tagline; + _data_tag_to_full_rows($type, $tagline, \@ns_list, $tagline->getAttribute( "tag" )); + + if ($xmltype eq 'metabib' and $tag eq '245') { + _data_tag_to_full_rows($type, $tagline, \@ns_list, 'tnf'); + } + } + + $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml"); + return @ns_list; +} + +=head2 _special_tag_to_full_rows + +Converts a leader or control field to a set of normalized values + +=cut + +sub _special_tag_to_full_rows { + my $type = shift; + my $tagline = shift; + my $ns_list = shift; + my $tagname = shift; + + my $ns = $type->new; + + $ns->tag( $tagname ); + my $val = $tagline->textContent; + $val = NFD($val); + $val =~ s/\pM+//sgo; + $val =~ s/\pC+//sgo; + $val =~ s/\W+$//sgo; + $ns->value( $val ); + + push @$ns_list, $ns; +} + +=head2 _data_tag_to_full_rows + +Converts a data field to a set of normalized values + +=cut + +sub _data_tag_to_full_rows { + my $type = shift; + my $tagline = shift; + my $ns_list = shift; + my $tag = shift; + + my $ind1 = $tagline->getAttribute( "ind1" ); + my $ind2 = $tagline->getAttribute( "ind2" ); + + for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) { + next unless $data; + + my $ns = $type->new; + + $ns->tag( $tag ); + $ns->ind1( $ind1 ); + $ns->ind2( $ind2 ); + $ns->subfield( $data->getAttribute( "code" ) ); + my $val = $data->textContent; + $val = NFD($val); + $val =~ s/\pM+//sgo; + $val =~ s/\pC+//sgo; + $val =~ s/\W+$//sgo; + # Split date ranges and ISSNs on the hyphen + $val =~ s/(\d{4})-(\d{3,4}x?)/ $1 $2 /goi; + $val =~ s/(\w+)\/(\w+)/$1 $2/sgo; + $ns->value( lc($val) ); + + push @$ns_list, $ns; + } +} + +sub flat_marc_xml { + my $self = shift; + my $client = shift; + my $xml = shift; + + $log->debug("processing [$xml]"); + + $xml = $parser->parse_string(OpenILS::Application::AppUtils->entityize($xml)) unless (ref $xml); + + my $type = 'metabib'; + $type = 'authority' if ($self->api_name =~ /authority/o); + + OpenILS::Application::Ingest->post_init(); + + $client->respond($_) for (_marcxml_to_full_rows($xml, $type)); + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.flat_marc.authority.xml", + method => "flat_marc_xml", + api_level => 1, + argc => 1, + stream => 1, +); +__PACKAGE__->register_method( + api_name => "open-ils.ingest.flat_marc.biblio.xml", + method => "flat_marc_xml", + api_level => 1, + argc => 1, + stream => 1, +); + +sub flat_marc_record { + my $self = shift; + my $client = shift; + my $rec = shift; + + my $type = 'biblio'; + $type = 'authority' if ($self->api_name =~ /authority/o); + + OpenILS::Application::Ingest->post_init(); + my $r = OpenSRF::AppSession + ->create('open-ils.cstore') + ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec ) + ->gather(1); + + + return undef unless ($r and $r->marc); + + my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc); + for my $row (@rows) { + $client->respond($row); + $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG); + } + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.flat_marc.biblio.record_entry", + method => "flat_marc_record", + api_level => 1, + argc => 1, + stream => 1, +); +__PACKAGE__->register_method( + api_name => "open-ils.ingest.flat_marc.authority.record_entry", + method => "flat_marc_record", + api_level => 1, + argc => 1, + stream => 1, +); + +# -------------------------------------------------------------------------------- +# URI extraction + +package OpenILS::Application::Ingest::Biblio::URI; +use base qw/OpenILS::Application::Ingest/; +use Unicode::Normalize; +use OpenSRF::EX qw/:try/; + + +sub _extract_856_uris { + + my $rec = shift; + my $max_cn = shift; + my $max_uri = shift; + my @objects; + + my $recid = $rec->id; + my $marcxml = $rec->marc; + + my $document = $parser->parse_string($marcxml); + my @nodes = $document->findnodes('//*[local-name()="datafield" and @tag="856" and (@ind1="4" or @ind1="1") and (@ind2="0" or @ind2="1")]'); + + my $cstore = OpenSRF::AppSession->connect('open-ils.cstore'); + + my %cn_cache; + + for my $node (@nodes) { + # first, is there a URI? + my $href = $node->findvalue('*[local-name()="subfield" and @code="u"]/text()'); + next unless ($href); + + # now, find the best possible label + my $label = $node->findvalue('*[local-name()="subfield" and @code="y"]/text()'); + $label ||= $node->findvalue('*[local-name()="subfield" and @code="3"]/text()'); + $label ||= $href; + + # look for use info + my $use = $node->findvalue('*[local-name()="subfield" and @code="z"]/text()'); + $use ||= $node->findvalue('*[local-name()="subfield" and @code="2"]/text()'); + $use ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()'); + + # moving on to the URI owner + my $owner = $node->findvalue('*[local-name()="subfield" and @code="9"]/text()'); # Evergreen special sauce + $owner ||= $node->findvalue('*[local-name()="subfield" and @code="w"]/text()'); + $owner ||= $node->findvalue('*[local-name()="subfield" and @code="n"]/text()'); + + $owner =~ s/^.*?\((\w+)\).*$/$1/o; # unwrap first paren-enclosed string and then ... + + # no owner? skip it :( + next unless ($owner); + + my $org = $cstore + ->request( 'open-ils.cstore.direct.actor.org_unit.search' => { shortname => $owner} ) + ->gather(1); + + next unless ($org); + + # now we can construct the uri object + my $uri = $cstore + ->request( 'open-ils.cstore.direct.asset.uri.search' => { label => $label, href => $href, use_restriction => $use, active => 't' } ) + ->gather(1); + + if (!$uri) { + $uri = Fieldmapper::asset::uri->new; + $uri->isnew( 1 ); + $uri->id( $$max_uri++ ); + $uri->label($label); + $uri->href($href); + $uri->active('t'); + $uri->use_restriction($use); + } + + # see if we need to create a call number + my $cn = $cn_cache{$org->id}; + $cn = $cn->clone if ($cn); + $cn->clear_isnew if ($cn); + + $cn ||= $cstore + ->request( 'open-ils.cstore.direct.asset.call_number.search' => { owning_lib => $org->id, record => $recid, label => '##URI##' } ) + ->gather(1); + + if (!$cn) { + $cn = Fieldmapper::asset::call_number->new; + $cn->isnew( 1 ); + $cn->deleted('f'); + $cn->id( $$max_cn++ ); + $cn->owning_lib( $org->id ); + $cn->record( $recid ); + $cn->create_date( 'now' ); + $cn->creator( $rec->creator ); + $cn->editor( $rec->editor ); + $cn->edit_date( 'now' ); + $cn->label( '##URI##' ); + } + + $cn_cache{$org->id} = $cn; + + push @objects, { uri => $uri, call_number => $cn }; + } + + $log->debug("Returning ".scalar(@objects)." URI nodes for record $recid"); + $cstore->disconnect; + return @objects; +} + +sub get_uris_record { + my $self = shift; + my $client = shift; + my $rec = shift; + + OpenILS::Application::Ingest->post_init(); + my $r = OpenSRF::AppSession + ->create('open-ils.cstore') + ->request( "open-ils.cstore.direct.biblio.record_entry.retrieve" => $rec ) + ->gather(1); + + return undef unless ($r and $r->marc); + + $client->respond($_) for (_extract_856_uris($r)); + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.856_uri.record", + method => "get_uris_record", + api_level => 1, + argc => 1, + stream => 1, +); + +sub get_uris_object { + my $self = shift; + my $client = shift; + my $obj = shift; + my $max_cn = shift; + my $max_uri = shift; + + return undef unless ($obj and $obj->marc); + + $client->respond($_) for (_extract_856_uris($obj, \$max_cn, \$max_uri)); + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.856_uri.object", + method => "get_uris_object", + api_level => 1, + argc => 1, + stream => 1, +); + + +# -------------------------------------------------------------------------------- +# Fingerprinting + +package OpenILS::Application::Ingest::Biblio::Fingerprint; +use base qw/OpenILS::Application::Ingest/; +use Unicode::Normalize; +use OpenSRF::EX qw/:try/; + +sub biblio_fingerprint_record { + my $self = shift; + my $client = shift; + my $rec = shift; + + OpenILS::Application::Ingest->post_init(); + + my $r = OpenSRF::AppSession + ->create('open-ils.cstore') + ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec ) + ->gather(1); + + return undef unless ($r and $r->marc); + + my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc); + $log->debug("Returning [$fp] as fingerprint for record $rec", INFO); + $fp->{quality} = int($fp->{quality}); + return $fp; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.fingerprint.record", + method => "biblio_fingerprint_record", + api_level => 1, + argc => 1, +); + +our $fp_script; +sub biblio_fingerprint { + my $self = shift; + my $client = shift; + my $xml = OpenILS::Application::AppUtils->entityize(shift); + + $log->internal("Got MARC [$xml]"); + + if(!$fp_script) { + my @pfx = ( "apps", "open-ils.ingest","app_settings" ); + my $conf = OpenSRF::Utils::SettingsClient->new; + + my $libs = $conf->config_value(@pfx, 'script_path'); + my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint'); + my $script_libs = (ref($libs)) ? $libs : [$libs]; + + $log->debug("Loading script $script_file for biblio fingerprinting..."); + + $fp_script = new OpenILS::Utils::ScriptRunner + ( file => $script_file, + paths => $script_libs, + reset_count => 100 ); + } + + $fp_script->insert('environment' => {marc => $xml} => 1); + + my $res = $fp_script->run || ($log->error( "Fingerprint script died! $@" ) && return undef); + $log->debug("Script for biblio fingerprinting completed successfully..."); + + return $res; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.fingerprint.xml", + method => "biblio_fingerprint", + api_level => 1, + argc => 1, +); + +our $rd_script; +sub biblio_descriptor { + my $self = shift; + my $client = shift; + my $xml = OpenILS::Application::AppUtils->entityize(shift); + + $log->internal("Got MARC [$xml]"); + + if(!$rd_script) { + my @pfx = ( "apps", "open-ils.ingest","app_settings" ); + my $conf = OpenSRF::Utils::SettingsClient->new; + + my $libs = $conf->config_value(@pfx, 'script_path'); + my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor'); + my $script_libs = (ref($libs)) ? $libs : [$libs]; + + $log->debug("Loading script $script_file for biblio descriptor extraction..."); + + $rd_script = new OpenILS::Utils::ScriptRunner + ( file => $script_file, + paths => $script_libs, + reset_count => 100 ); + } + + $log->debug("Setting up environment for descriptor extraction script..."); + $rd_script->insert('environment.marc' => $xml => 1); + $log->debug("Environment building complete..."); + + my $res = $rd_script->run || ($log->error( "Descriptor script died! $@" ) && return undef); + $log->debug("Script for biblio descriptor extraction completed successfully"); + + my $d1 = $res->date1; + if ($d1 && $d1 ne ' ') { + $d1 =~ tr/ux/00/; + $res->date1( $d1 ); + } + + my $d2 = $res->date2; + if ($d2 && $d2 ne ' ') { + $d2 =~ tr/ux/99/; + $res->date2( $d2 ); + } + + return $res; +} +__PACKAGE__->register_method( + api_name => "open-ils.ingest.descriptor.xml", + method => "biblio_descriptor", + api_level => 1, + argc => 1, +); + + +1; + +# vim:et:ts=4:sw=4: diff --git a/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm b/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm new file mode 100644 index 0000000000..055266a8fe --- /dev/null +++ b/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm @@ -0,0 +1,89 @@ +{ # Every driver needs to provide a 'compile()' method to OpenILS::Application::Storage::FTS. + # If that driver wants to support FTI, that is... + #------------------------------------------------------------------------------- + package OpenILS::Application::Storage::FTS; + use OpenSRF::Utils::Logger qw/:level/; + use Unicode::Normalize; + my $log = 'OpenSRF::Utils::Logger'; + + sub compile { + my $self = shift; + my $class = shift; + my $term = NFD(shift()); + + $log->debug("Raw term: $term",DEBUG); + $log->debug("Search class: $class",DEBUG); + + $term =~ s/\&//go; + $term =~ s/\|//go; + + $self = ref($self) || $self; + $self = bless {} => $self; + $self->{class} = $class; + + $term =~ s/(\pM+)//gos; + $term =~ s/(\b\.\b)//gos; + + # hack to normalize ratio-like strings + while ($term =~ /\b\d{1}:[, ]?\d+(?:[ ,]\d+[^:])+/o) { + $term = $` . join ('', split(/[, ]/, $&)) . $'; + } + + $self->decompose($term); + + my $newterm = ''; + $newterm = join('&', $self->words) if ($self->words); + + if (@{$self->nots}) { + $newterm = '('.$newterm.')&' if ($newterm); + $newterm .= '!('. join('|', $self->nots) . ')'; + } + + $log->debug("Compiled term is [$newterm]", DEBUG); + $newterm = OpenILS::Application::Storage::Driver::Pg->quote($newterm); + $log->debug("Quoted term is [$newterm]", DEBUG); + + $self->{fts_query} = ["to_tsquery('$$self{class}',$newterm)"]; + $self->{fts_query_nots} = []; + $self->{fts_op} = '@@'; + $self->{text_col} = shift; + $self->{fts_col} = shift; + + return $self; + } + + sub sql_where_clause { + my $self = shift; + my $column = $self->fts_col; + my @output; + + my @ranks; + for my $fts ( $self->fts_query ) { + push @output, join(' ', $self->fts_col, $self->{fts_op}, $fts); + push @ranks, "rank($column, $fts)"; + } + $self->{fts_rank} = \@ranks; + + my $phrase_match = $self->sql_exact_phrase_match(); + return join(' AND ', @output) . $phrase_match; + } + + sub sql_exact_phrase_match { + my $self = shift; + my $column = $self->text_col; + my $output = ''; + for my $phrase ( $self->phrases ) { + $phrase =~ s/\*/\\*/go; + $phrase =~ s/\./\\./go; + $phrase =~ s/'/\\'/go; + $phrase =~ s/\s+/\\s+/go; + $log->debug("Adding phrase [$phrase] to the match list", DEBUG); + $output .= " AND $column ~* \$\$(^|\\W+)$phrase(\\W+|\$)\$\$"; + } + $log->debug("Phrase list is [$output]", DEBUG); + return $output; + } + +} + +1; -- 2.11.0