Link checker: DB layer and similar changes for URL Verification
authorMike Rylander <mrylander@gmail.com>
Mon, 6 Aug 2012 19:51:04 +0000 (15:51 -0400)
committerMike Rylander <mrylander@gmail.com>
Thu, 14 Feb 2013 19:19:16 +0000 (14:19 -0500)
Schema and IDL changes for URL Verification functionality
Start building URL-Validation related funcitons
Add Rose::URI to the CPAN_MODULES list
Add ON INSERT trigger to parse the URL as it is added

Signed-off-by: Mike Rylander <mrylander@gmail.com>
Open-ILS/examples/fm_IDL.xml
Open-ILS/src/extras/Makefile.install
Open-ILS/src/sql/Pg/075.schema.url_verify.sql [new file with mode: 0644]
Open-ILS/src/sql/Pg/076.functions.url_verify.sql [new file with mode: 0644]
Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql [new file with mode: 0644]
Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql [new file with mode: 0644]

index a7a765d..c774122 100644 (file)
@@ -19,6 +19,56 @@ You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 
+############################
+          TEMPLATE
+############################
+
+    <class
+        id=""
+        controller="open-ils.cstore open-ils.pcrud"
+        oils_obj:fieldmapper=""
+        oils_persist:tablename=""
+        reporter:label=""
+        oils_persist:field_safe=""
+        oils_persist:virtual=""
+        oils_persist:readonly=""
+        reporter:core=""
+    >
+
+        <oils_persist:source_definition><![CDATA[
+            SELECT * FROM foo
+        ]]></oils_persist:source_definition>
+
+        <fields oils_persist:primary="" oils_persist:sequence="">
+            <field reporter:label="ID" name="id" reporter:datatype="id"/>
+                       <field reporter:label="" name="" reporter:datatype="org_unit"/>
+            <field reporter:label="" name="" reporter:datatype="text"/>
+            <field reporter:label="" name="" reporter:datatype="link"/>
+            <field reporter:label="" name="" reporter:datatype="timestamp"/>
+            <field reporter:label="" name="" reporter:datatype="money"/>
+            <field reporter:label="" name="" reporter:datatype="bool"/>
+            <field reporter:label="" name="" reporter:datatype="int" oils_obj:required="true"/>
+            <field reporter:label="" name="" reporter:datatype="int" oils_persist:virtual="true"/>
+        </fields>
+
+        <links>
+            <link field="" reltype="has_a" key="" map="" class=""/>
+        </links>
+
+        <permacrud xmlns="http://open-ils.org/spec/opensrf/IDL/permacrud/v1">
+            <actions>
+                <create permission="" gloabl_required="true"/>
+                <retrieve/>
+                <update permission="" context_field=""/>
+                <delete permission="">
+                    <context link="" field=""/>
+                </delete>
+            </actions>
+        </permacrud>
+
+    </class>
+
+
 -->
 
 <IDL xmlns="http://opensrf.org/spec/IDL/base/v1" xmlns:idl="http://opensrf.org/spec/IDL/base/v1" xmlns:oils_persist="http://open-ils.org/spec/opensrf/IDL/persistence/v1" xmlns:oils_obj="http://open-ils.org/spec/opensrf/IDL/objects/v1" xmlns:reporter="http://open-ils.org/spec/opensrf/IDL/reporter/v1" xmlns:permacrud="http://open-ils.org/spec/opensrf/IDL/permacrud/v1">
@@ -9287,6 +9337,247 @@ SELECT  usr,
                </permacrud>
        </class>
 
+    <class
+        id="uvs"
+        controller="open-ils.cstore open-ils.pcrud"
+        oils_obj:fieldmapper="url_verify::session"
+        oils_persist:tablename="url_verify.session"
+        reporter:label="URL Verification Session"
+    >
+        <fields oils_persist:primary="id" oils_persist:sequence="url_verify.session_id_seq">
+            <field reporter:label="ID" name="id" reporter:datatype="id"/>
+            <field reporter:label="Name" name="name" reporter:datatype="text" oils_obj:required="true"/>
+                       <field reporter:label="Owning Library" name="owning_lib" reporter:datatype="org_unit" oils_obj:required="true"/>
+            <field reporter:label="Creator" name="creator" reporter:datatype="link" oils_obj:required="true"/>
+            <field reporter:label="Record Container" name="container" reporter:datatype="link" oils_obj:required="true"/>
+            <field reporter:label="Create Time" name="create_time" reporter:datatype="timestamp"/>
+            <field reporter:label="Search Constraints" name="search" reporter:datatype="text" oils_obj:required="true"/>
+            <field reporter:label="URL Selectors" name="selectors" reporter:datatype="link" oils_persist:virtual="true"/>
+            <field reporter:label="Verification Attempts" name="attempts" reporter:datatype="link" oils_persist:virtual="true"/>
+        </fields>
+
+        <links>
+            <link field="owning_lib" reltype="has_a" key="id" map="" class="aou"/>
+            <link field="creator" reltype="has_a" key="id" map="" class="au"/>
+            <link field="container" reltype="has_a" key="id" map="" class="cbreb"/>
+            <link field="selectors" reltype="has_many" key="session" map="" class="uvus"/>
+            <link field="attempts" reltype="has_many" key="session" map="" class="uvva"/>
+        </links>
+
+        <permacrud xmlns="http://open-ils.org/spec/opensrf/IDL/permacrud/v1">
+            <actions>
+                <create permission="ADMIN_URL_VERIFY" context_field="owning_lib"/>
+                <retrieve permission="ADMIN_URL_VERIFY" context_field="owning_lib"/>
+                <update permission="ADMIN_URL_VERIFY" context_field="owning_lib"/>
+                <delete permission="ADMIN_URL_VERIFY" context_field="owning_lib"/>
+            </actions>
+        </permacrud>
+
+    </class>
+
+    <class
+        id="uvus"
+        controller="open-ils.cstore open-ils.pcrud"
+        oils_obj:fieldmapper="url_verify::url_selector"
+        oils_persist:tablename="url_verify.url_selector"
+        reporter:label="URL Verification URL Selector"
+    >
+        <fields oils_persist:primary="id" oils_persist:sequence="url_verify.url_selector_id_seq">
+            <field reporter:label="ID" name="id" reporter:datatype="id"/>
+            <field reporter:label="XPath" name="xpath" reporter:datatype="text" oils_obj:required="true"/>
+                       <field reporter:label="Session" name="session" reporter:datatype="link" oils_obj:required="true"/>
+            <field reporter:label="URLs" name="urls" reporter:datatype="link" oils_persist:virtual="true"/>
+        </fields>
+
+        <links>
+            <link field="session" reltype="has_a" key="id" map="" class="uvs"/>
+            <link field="urls" reltype="has_many" key="id" map="" class="uvu"/>
+        </links>
+
+        <permacrud xmlns="http://open-ils.org/spec/opensrf/IDL/permacrud/v1">
+            <actions>
+                <create permission="ADMIN_URL_VERIFY">
+                    <context link="session" field="owning_lib"/>
+                </create>
+                <retrieve permission="ADMIN_URL_VERIFY">
+                    <context link="session" field="owning_lib"/>
+                </retrieve>
+                <update permission="ADMIN_URL_VERIFY">
+                    <context link="session" field="owning_lib"/>
+                </update>
+                <delete permission="ADMIN_URL_VERIFY">
+                    <context link="session" field="owning_lib"/>
+                </delete>
+            </actions>
+        </permacrud>
+
+    </class>
+
+    <class
+        id="uvu"
+        controller="open-ils.cstore open-ils.pcrud"
+        oils_obj:fieldmapper="url_verify::url"
+        oils_persist:tablename="url_verify.url"
+        reporter:label="URL Verification URL"
+    >
+        <fields oils_persist:primary="id" oils_persist:sequence="url_verify.url_id_seq">
+            <field reporter:label="ID" name="id" reporter:datatype="id"/>
+                       <field reporter:label="Redirected From" name="redirect_from" reporter:datatype="link"/>
+                       <field reporter:label="Container Item" name="item" reporter:datatype="link" oils_obj:required="true"/>
+                       <field reporter:label="URL Selector" name="url_selector" reporter:datatype="link"/>
+            <field reporter:label="Tag" name="tag" reporter:datatype="text"/>
+            <field reporter:label="Subfield" name="subfield" reporter:datatype="text"/>
+            <field reporter:label="Ordinal Position" name="ord" reporter:datatype="int"/>
+            <field reporter:label="URL" name="full_url" reporter:datatype="text"/>
+            <field reporter:label="Scheme" name="scheme" reporter:datatype="text"/>
+            <field reporter:label="Host" name="host" reporter:datatype="text"/>
+            <field reporter:label="Domain" name="domain" reporter:datatype="text"/>
+            <field reporter:label="TLD" name="tld" reporter:datatype="text"/>
+            <field reporter:label="Path" name="path" reporter:datatype="text"/>
+            <field reporter:label="Page" name="page" reporter:datatype="text"/>
+            <field reporter:label="Query" name="query" reporter:datatype="text"/>
+            <field reporter:label="Fragment" name="fragment" reporter:datatype="text"/>
+        </fields>
+
+        <links>
+            <link field="redirect_from" reltype="has_a" key="id" map="" class="uvu"/>
+            <link field="item" reltype="has_a" key="id" map="" class="cbrebi"/>
+            <link field="url_selector" reltype="has_a" key="id" map="" class="uvus"/>
+        </links>
+
+        <permacrud xmlns="http://open-ils.org/spec/opensrf/IDL/permacrud/v1">
+            <actions>
+                <create permission="ADMIN_URL_VERIFY">
+                    <context link="url_selector session" field="owning_lib"/>
+                </create>
+                <retrieve permission="ADMIN_URL_VERIFY">
+                    <context link="url_selector session" field="owning_lib"/>
+                </retrieve>
+                <update permission="ADMIN_URL_VERIFY">
+                    <context link="url_selector session" field="owning_lib"/>
+                </update>
+                <delete permission="ADMIN_URL_VERIFY">
+                    <context link="url_selector session" field="owning_lib"/>
+                </delete>
+            </actions>
+        </permacrud>
+
+    </class>
+
+    <class
+        id="uvva"
+        controller="open-ils.cstore open-ils.pcrud"
+        oils_obj:fieldmapper="url_verify::verification_attempt"
+        oils_persist:tablename="url_verify.verification_attempt"
+        reporter:label="URL Verification Attempt"
+    >
+        <fields oils_persist:primary="id" oils_persist:sequence="url_verify.verification_attempt_id_seq">
+            <field reporter:label="ID" name="id" reporter:datatype="id"/>
+                       <field reporter:label="User" name="usr" reporter:datatype="link"/>
+                       <field reporter:label="Session" name="session" reporter:datatype="link"/>
+            <field reporter:label="Start Time" name="start_time" reporter:datatype="timestamp"/>
+            <field reporter:label="Finish Time" name="finish_time" reporter:datatype="timestamp"/>
+        </fields>
+
+        <links>
+            <link field="session" reltype="has_a" key="id" map="" class="uvs"/>
+            <link field="usr" reltype="has_a" key="id" map="" class="au"/>
+        </links>
+
+        <permacrud xmlns="http://open-ils.org/spec/opensrf/IDL/permacrud/v1">
+            <actions>
+                <create permission="ADMIN_URL_VERIFY">
+                    <context link="session" field="owning_lib"/>
+                </create>
+                <retrieve permission="ADMIN_URL_VERIFY">
+                    <context link="session" field="owning_lib"/>
+                </retrieve>
+                <update permission="ADMIN_URL_VERIFY">
+                    <context link="session" field="owning_lib"/>
+                </update>
+                <delete permission="ADMIN_URL_VERIFY">
+                    <context link="session" field="owning_lib"/>
+                </delete>
+            </actions>
+        </permacrud>
+
+    </class>
+
+    <class
+        id="uvuv"
+        controller="open-ils.cstore open-ils.pcrud"
+        oils_obj:fieldmapper="url_verify::url_verification"
+        oils_persist:tablename="url_verify.url_verification"
+        reporter:label="URL Verification"
+    >
+        <fields oils_persist:primary="id" oils_persist:sequence="url_verify.url_verification_id_seq">
+            <field reporter:label="ID" name="id" reporter:datatype="id"/>
+                       <field reporter:label="URL" name="url" reporter:datatype="link"/>
+                       <field reporter:label="Attempt" name="attempt" reporter:datatype="link"/>
+            <field reporter:label="Request Time" name="req_time" reporter:datatype="timestamp"/>
+            <field reporter:label="Result Time" name="res_time" reporter:datatype="timestamp"/>
+            <field reporter:label="Result Code" name="res_code" reporter:datatype="int"/>
+            <field reporter:label="Result Text" name="res_text" reporter:datatype="text"/>
+                       <field reporter:label="Redirected To" name="redirect_to" reporter:datatype="link"/>
+        </fields>
+
+        <links>
+            <link field="url" reltype="has_a" key="id" map="" class="uvu"/>
+            <link field="attempt" reltype="has_a" key="id" map="" class="uvva"/>
+            <link field="redirect_to" reltype="has_a" key="id" map="" class="uvu"/>
+        </links>
+
+        <permacrud xmlns="http://open-ils.org/spec/opensrf/IDL/permacrud/v1">
+            <actions>
+                <create permission="ADMIN_URL_VERIFY">
+                    <context link="attempt session" field="owning_lib"/>
+                </create>
+                <retrieve permission="ADMIN_URL_VERIFY">
+                    <context link="attempt session" field="owning_lib"/>
+                </retrieve>
+                <update permission="ADMIN_URL_VERIFY">
+                    <context link="attempt session" field="owning_lib"/>
+                </update>
+                <delete permission="ADMIN_URL_VERIFY">
+                    <context link="attempt session" field="owning_lib"/>
+                </delete>
+            </actions>
+        </permacrud>
+
+    </class>
+
+    <class
+        id="uvfs"
+        controller="open-ils.cstore open-ils.pcrud"
+        oils_obj:fieldmapper="url_verify::filter_set"
+        oils_persist:tablename="url_verify.filter_set"
+        reporter:label="URL Verification Filter Set"
+    >
+        <fields oils_persist:primary="id" oils_persist:sequence="url_verify.url_verification_id_seq">
+            <field reporter:label="ID" name="id" reporter:datatype="id"/>
+            <field reporter:label="Name" name="name" reporter:datatype="text"/>
+                       <field reporter:label="Owning Library" name="owning_lib" reporter:datatype="org_unit"/>
+                       <field reporter:label="Creator" name="creator" reporter:datatype="link"/>
+            <field reporter:label="Create Time" name="create_time" reporter:datatype="timestamp"/>
+                       <field reporter:label="Filter" name="filter" reporter:datatype="text"/>
+        </fields>
+
+        <links>
+            <link field="owning_lib" reltype="has_a" key="id" map="" class="aou"/>
+            <link field="creator" reltype="has_a" key="id" map="" class="au"/>
+        </links>
+
+        <permacrud xmlns="http://open-ils.org/spec/opensrf/IDL/permacrud/v1">
+            <actions>
+                <create permission="ADMIN_URL_VERIFY" context_field="owning_lib"/>
+                <retrieve permission="ADMIN_URL_VERIFY" context_field="owning_lib"/>
+                <update permission="ADMIN_URL_VERIFY" context_field="owning_lib"/>
+                <delete permission="ADMIN_URL_VERIFY" context_field="owning_lib"/>
+            </actions>
+        </permacrud>
+
+    </class>
+
        <class id="cmrtm" controller="open-ils.cstore open-ils.pcrud" oils_obj:fieldmapper="config::marc21_rec_type_map" oils_persist:tablename="config.marc21_rec_type_map" reporter:label="MARC21 Record Type Map" oils_persist:field_safe="true">
                <fields oils_persist:primary="code">
                        <field reporter:label="Code" name="code" reporter:datatype="id"/>
index 60c8d66..344d417 100644 (file)
@@ -219,7 +219,8 @@ CPAN_MODULES = \
        Library::CallNumber::LC \
        Net::Z3950::Simple2ZOOM \
        Template::Plugin::POSIX \
-       SRU
+       SRU \
+       Rose::URI
 
 # More chronically unpackaged CPAN modules (available in Squeeze though)
 CPAN_MODULES_MORE = \
diff --git a/Open-ILS/src/sql/Pg/075.schema.url_verify.sql b/Open-ILS/src/sql/Pg/075.schema.url_verify.sql
new file mode 100644 (file)
index 0000000..753c769
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2012  Equinox Software, Inc.
+ * Mike Rylander <miker@esilibrary.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+BEGIN;
+
+DROP SCHEMA IF EXISTS url_verify CASCADE;
+
+CREATE SCHEMA url_verify;
+
+CREATE TABLE url_verify.session (
+    id          SERIAL                      PRIMARY KEY,
+    name        TEXT                        NOT NULL,
+    owning_lib  INT                         NOT NULL REFERENCES actor.org_unit (id) DEFERRABLE INITIALLY DEFERRED,
+    creator     INT                         NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED,
+    container   INT                         NOT NULL REFERENCES container.biblio_record_entry_bucket (id) DEFERRABLE INITIALLY DEFERRED,
+    create_time TIMESTAMP WITH TIME ZONE    NOT NULL DEFAULT NOW(),
+    search      TEXT                        NOT NULL,
+    CONSTRAINT name_once_per_lib UNIQUE (name, owning_lib)
+);
+
+CREATE TABLE url_verify.url_selector (
+    id      SERIAL  PRIMARY KEY,
+    xpath   TEXT    NOT NULL,
+    session INT     NOT NULL REFERENCES url_verify.session (id) DEFERRABLE INITIALLY DEFERRED,
+    CONSTRAINT tag_once_per_sess UNIQUE (xpath, session)
+);
+
+CREATE TABLE url_verify.url (
+    id              SERIAL  PRIMARY KEY,
+    redirect_from   INT     REFERENCES url_verify.url(id) DEFERRABLE INITIALLY DEFERRED,
+    item            INT     NOT NULL REFERENCES container.biblio_record_entry_bucket_item (id) DEFERRABLE INITIALLY DEFERRED,
+    url_selector    INT     NOT NULL REFERENCES url_verify.url_selector (id) DEFERRABLE INITIALLY DEFERRED,
+    tag             TEXT    NOT NULL,
+    subfield        TEXT    NOT NULL,
+    ord             INT     NOT NULL, -- ordinal position of this url within the record as found by url_selector, for later update
+    full_url        TEXT    NOT NULL,
+    scheme          TEXT,
+    username        TEXT,
+    password        TEXT,
+    host            TEXT,
+    domain          TEXT,
+    tld             TEXT,
+    port            TEXT,
+    path            TEXT,
+    page            TEXT,
+    query           TEXT,
+    fragment        TEXT,
+    CONSTRAINT redirect_or_from_item CHECK (
+        redirect_from IS NOT NULL OR (
+            item         IS NOT NULL AND
+            url_selector IS NOT NULL AND
+            tag          IS NOT NULL AND
+            subfield     IS NOT NULL AND
+            ord          IS NOT NULL
+        )
+    )
+);
+
+CREATE TABLE url_verify.verification_attempt (
+    id          SERIAL                      PRIMARY KEY,
+    usr         INT                         NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED,
+    session     INT                         NOT NULL REFERENCES url_verify.session (id) DEFERRABLE INITIALLY DEFERRED,
+    start_time  TIMESTAMP WITH TIME ZONE    NOT NULL DEFAULT NOW(),
+    finish_time TIMESTAMP WITH TIME ZONE
+);
+CREATE TABLE url_verify.url_verification (
+    id          SERIAL                      PRIMARY KEY,
+    url         INT                         NOT NULL REFERENCES url_verify.url (id) DEFERRABLE INITIALLY DEFERRED,
+    attempt     INT                         NOT NULL REFERENCES url_verify.verification_attempt (id) DEFERRABLE INITIALLY DEFERRED,
+    req_time    TIMESTAMP WITH TIME ZONE    NOT NULL DEFAULT NOW(),
+    res_time    TIMESTAMP WITH TIME ZONE, 
+    res_code    INT                         CHECK (res_code BETWEEN 100 AND 599),
+    res_text    TEXT, 
+    redirect_to INT                         REFERENCES url_verify.url (id) DEFERRABLE INITIALLY DEFERRED -- if redirected
+);
+
+CREATE TABLE url_verify.filter_set (
+    id          SERIAL                      PRIMARY KEY,
+    name        TEXT                        NOT NULL,
+    owning_lib  INT                         NOT NULL REFERENCES actor.org_unit (id) DEFERRABLE INITIALLY DEFERRED,
+    creator     INT                         NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED,
+    create_time TIMESTAMP WITH TIME ZONE    NOT NULL DEFAULT NOW(),
+    filter      TEXT                        NOT NULL,
+    CONSTRAINT name_once_per_lib UNIQUE (name, owning_lib)
+);
+COMMIT;
+
diff --git a/Open-ILS/src/sql/Pg/076.functions.url_verify.sql b/Open-ILS/src/sql/Pg/076.functions.url_verify.sql
new file mode 100644 (file)
index 0000000..23dedab
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2012  Equinox Software, Inc.
+ * Mike Rylander <miker@esilibrary.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+BEGIN;
+
+CREATE OR REPLACE FUNCTION url_verify.parse_url (url_in TEXT) RETURNS url_verify.url AS $$
+
+use Rose::URI;
+
+my $url_in = shift;
+my $url = Rose::URI->new($url_in);
+
+my %parts = map { $_ => $url->$_ } qw/scheme username password host port path query fragment/;
+
+$parts{full_url} = $url_in;
+($parts{domain} = $parts{host}) =~ s/^[^.]+\.//;
+($parts{tld} = $parts{domain}) =~ s/(?:[^.]+\.)+//;
+($parts{page} = $parts{path}) =~ s#(?:[^/]*/)+##;
+
+return \%parts;
+
+$$ LANGUAGE PLPERLU;
+
+CREATE OR REPLACE FUNCTION url_verify.ingest_url () RETURNS TRIGGER AS $$
+DECLARE
+    tmp_row url_verify.url%ROWTYPE;
+BEGIN
+    SELECT * INTO tmp_row FROM url_verify.parse_url(NEW.full_url);
+
+    NEW.scheme          := tmp_row.scheme;
+    NEW.username        := tmp_row.username;
+    NEW.password        := tmp_row.password;
+    NEW.host            := tmp_row.host;
+    NEW.domain          := tmp_row.domain;
+    NEW.tld             := tmp_row.tld;
+    NEW.port            := tmp_row.port;
+    NEW.path            := tmp_row.path;
+    NEW.page            := tmp_row.page;
+    NEW.query           := tmp_row.query;
+    NEW.fragment        := tmp_row.fragment;
+
+    RETURN NEW;
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE TRIGGER ingest_url_tgr
+    BEFORE INSERT ON url_verify.url
+    FOR EACH ROW EXECUTE PROCEDURE url_verify.ingest_url(); 
+
+COMMIT;
+
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.url_verify.sql
new file mode 100644 (file)
index 0000000..7850b49
--- /dev/null
@@ -0,0 +1,82 @@
+DROP SCHEMA IF EXISTS url_verify CASCADE;
+
+CREATE SCHEMA url_verify;
+
+CREATE TABLE url_verify.session (
+    id          SERIAL                      PRIMARY KEY,
+    name        TEXT                        NOT NULL,
+    owning_lib  INT                         NOT NULL REFERENCES actor.org_unit (id) DEFERRABLE INITIALLY DEFERRED,
+    creator     INT                         NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED,
+    container   INT                         NOT NULL REFERENCES container.biblio_record_entry_bucket (id) DEFERRABLE INITIALLY DEFERRED,
+    create_time TIMESTAMP WITH TIME ZONE    NOT NULL DEFAULT NOW(),
+    search      TEXT                        NOT NULL,
+    CONSTRAINT name_once_per_lib UNIQUE (name, owning_lib)
+);
+
+CREATE TABLE url_verify.url_selector (
+    id      SERIAL  PRIMARY KEY,
+    xpath   TEXT    NOT NULL,
+    session INT     NOT NULL REFERENCES url_verify.session (id) DEFERRABLE INITIALLY DEFERRED,
+    CONSTRAINT tag_once_per_sess UNIQUE (xpath, session)
+);
+
+CREATE TABLE url_verify.url (
+    id              SERIAL  PRIMARY KEY,
+    redirect_from   INT     REFERENCES url_verify.url(id) DEFERRABLE INITIALLY DEFERRED,
+    item            INT     NOT NULL REFERENCES container.biblio_record_entry_bucket_item (id) DEFERRABLE INITIALLY DEFERRED,
+    url_selector    INT     NOT NULL REFERENCES url_verify.url_selector (id) DEFERRABLE INITIALLY DEFERRED,
+    tag             TEXT    NOT NULL,
+    subfield        TEXT    NOT NULL,
+    ord             INT     NOT NULL, -- ordinal position of this url within the record as found by url_selector, for later update
+    full_url        TEXT    NOT NULL,
+    scheme          TEXT,
+    username        TEXT,
+    password        TEXT,
+    host            TEXT,
+    domain          TEXT,
+    tld             TEXT,
+    port            TEXT,
+    path            TEXT,
+    page            TEXT,
+    query           TEXT,
+    fragment        TEXT,
+    CONSTRAINT redirect_or_from_item CHECK (
+        redirect_from IS NOT NULL OR (
+            item         IS NOT NULL AND
+            url_selector IS NOT NULL AND
+            tag          IS NOT NULL AND
+            subfield     IS NOT NULL AND
+            ord          IS NOT NULL
+        )
+    )
+);
+
+CREATE TABLE url_verify.verification_attempt (
+    id          SERIAL                      PRIMARY KEY,
+    usr         INT                         NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED,
+    session     INT                         NOT NULL REFERENCES url_verify.session (id) DEFERRABLE INITIALLY DEFERRED,
+    start_time  TIMESTAMP WITH TIME ZONE    NOT NULL DEFAULT NOW(),
+    finish_time TIMESTAMP WITH TIME ZONE
+);
+CREATE TABLE url_verify.url_verification (
+    id          SERIAL                      PRIMARY KEY,
+    url         INT                         NOT NULL REFERENCES url_verify.url (id) DEFERRABLE INITIALLY DEFERRED,
+    attempt     INT                         NOT NULL REFERENCES url_verify.verification_attempt (id) DEFERRABLE INITIALLY DEFERRED,
+    req_time    TIMESTAMP WITH TIME ZONE    NOT NULL DEFAULT NOW(),
+    res_time    TIMESTAMP WITH TIME ZONE, 
+    res_code    INT                         CHECK (res_code BETWEEN 100 AND 599),
+    res_text    TEXT, 
+    redirect_to INT                         REFERENCES url_verify.url (id) DEFERRABLE INITIALLY DEFERRED -- if redirected
+);
+
+CREATE TABLE url_verify.filter_set (
+    id          SERIAL                      PRIMARY KEY,
+    name        TEXT                        NOT NULL,
+    owning_lib  INT                         NOT NULL REFERENCES actor.org_unit (id) DEFERRABLE INITIALLY DEFERRED,
+    creator     INT                         NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED,
+    create_time TIMESTAMP WITH TIME ZONE    NOT NULL DEFAULT NOW(),
+    filter      TEXT                        NOT NULL,
+    CONSTRAINT name_once_per_lib UNIQUE (name, owning_lib)
+);
diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.functions.url_verify.sql
new file mode 100644 (file)
index 0000000..2087794
--- /dev/null
@@ -0,0 +1,22 @@
+BEGIN;
+
+CREATE OR REPLACE FUNCTION url_verify.parse_url (url_in TEXT) RETURNS url_verify.url AS $$
+
+use Rose::URI;
+
+my $url_in = shift;
+my $url = Rose::URI->new($url_in);
+
+my %parts = map { $_ => $url->$_ } qw/scheme username password host port path query fragment/;
+
+$parts{full_url} = $url_in;
+($parts{domain} = $parts{host}) =~ s/^[^.]+\.//;
+($parts{tld} = $parts{domain}) =~ s/(?:[^.]+\.)+//;
+($parts{page} = $parts{path}) =~ s#(?:[^/]*/)+##;
+
+return \%parts;
+
+$$ LANGUAGE PLPERLU;
+
+COMMIT;
+