root/main/trunk/greenstone2/perllib/gssql.pm @ 32580

Revision 32580, 28.4 KB (checked in by ak19, 9 months ago)

1. support for port param when connecting to SQL DB. 2. GS SQL Plugout now also uses the verbosity member variable when instantiating the gssql object. 3. Since the DBI object has PrintError? set to 1 on connection (and ShowErrorStatement? set to 1 to for more verbosity), which means an informative message is always printed on error or warning, there's no need for me to right warning statements everywhere when a db statement/call fails. Removed these redundant warnings. 4. Don't want GS XML Plugout's debug outhandle passed to the two gssql::insert methods, as we don't want them to write debug information to the debug handle. That should only be for the XML stuff (whether groups on or not), and the debug outhandle can moreover be set to the XSLT writer, concerning which makes it makes even less sense for gssql to output info and error debug statements into there. gssql now sticks to STDERR for debug information.

Line 
1###########################################################################
2#
3# gssql.pm -- DBI for SQL related utility functions used by
4# GreenstoneSQLPlugout and hereafter by GreenstoneSQLPlugin too.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gssql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use DBI; # the central package for this module used by GreenstoneSQL Plugout and Plugin
34
35##############################
36
37# TODO: add infrastructure for db_port, AutoCommit etc
38# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
39
40# + TODO: remove unnecessary warn() since PrintError is active
41# https://perldoc.perl.org/perlobj.html#Destructors
42
43# TODO: drop table if exists and create table if exists are available in MySQL. Use those cmds
44# instead of always first checking for existence ourselves?
45##############################
46
47# singleton connection
48my $_dbh_instance = undef; # calls undef() function. See https://perlmaven.com/undef-and-defined-in-perl
49my $ref_count = 0;
50
51# Need params_map keys:
52# - collection_name
53# - db_encoding (db content encoding) - MySQL can set this at server, db, table levels. For MySQL
54# we set the enc during connect at server level. Not sure whether other DB's support it at the
55# same levels.
56
57# For connection to MySQL, need:
58#  - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
59# So these will be parameterised, but in a hashmap, for just the connect method.
60
61# Parameterise (one or more methods may use them):
62# - db_name (which is the GS3 sitename)
63
64sub new
65
66    my $class = shift(@_);
67   
68    my ($params_map) = @_;
69   
70    # library_url: to be specified on the cmdline if not using a GS-included web server
71    # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
72   
73    # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
74    # Making a shallow copy works, and can handle unknown params:
75    #my $self = $params_map;
76
77    # but being explicit for class params needed for MySQL:
78    my $self = {
79    'collection_name' => $params_map->{'collection_name'},
80    'verbosity' => $params_map->{'verbosity'} || 1
81    };
82
83    # The db_encoding option is presently not passed in to this constructor as parameter.
84    # Placed here to indicate it's sort of optional.
85    # Since docxml are all in utf8, the contents of the GS SQL database should be too,
86    # So making utf8 the hidden default at present.
87    $self->{'db_encoding'} = $params_map->{'db_encoding'} || "utf8";
88   
89    $self = bless($self, $class);   
90
91    $self->{'tablename_prefix'} = $self->sanitize_name($params_map->{'collection_name'});
92   
93    return $self;
94}
95
96
97
98#################################
99
100# Database access related functions
101# http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
102# https://www.guru99.com/insert-into.html
103
104# TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
105# Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
106# https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
107# BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
108# The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
109# Unless they do a full rebuild, which will recreate the table from scratch?
110# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
111# But then should set AutoCommit to off on connection, and remember to commit every time
112
113#################
114# Database functions that use the perl DBI module (with the DBD driver module for mysql)
115#################
116
117################### BASIC DB OPERATIONS ##################
118
119# THE NEW DB FUNCTIONS
120# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
121# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
122
123# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
124
125
126
127# SINGLETON / GET INSTANCE PATTERN
128# https://stackoverflow.com/questions/16655603/perl-objects-class-variable-initialization
129# https://stackoverflow.com/questions/7587157/how-can-i-set-a-static-variable-that-can-be-accessed-by-all-subclasses-of-the-sa
130# Singleton without Moose: https://www.perl.com/article/52/2013/12/11/Implementing-the-singleton-pattern-in-Perl/
131
132sub connect_to_db
133{
134    my $self= shift (@_);
135    my ($params_map) = @_;
136   
137    $params_map->{'db_encoding'} = $self->{'db_encoding'};
138    $params_map->{'verbosity'} = $self->{'verbosity'};
139   
140    $self->{'db_handle'} = &_get_connection_instance($params_map); # getting singleton (class method)
141    if($self->{'db_handle'}) {
142    $ref_count++; # if successful, keep track of the number of refs to the single db connection
143    return $self->{'db_handle'};
144    }
145    return undef;
146}
147
148# SINGLETON METHOD #
149# TODO: where should the defaults for these params be, here or in GS-SQLPlugin/Plugout?
150sub _get_connection_instance
151{
152    #my $self= shift (@_); # singleton method doesn't use self, but callers don't need to know that
153    my ($params_map) = @_;
154
155    return $_dbh_instance if($_dbh_instance);
156
157    # or make the connection
158   
159    # For proper utf8 support in MySQL, encoding should be 'utf8mb4' as 'utf8' is insufficient
160    my $db_enc = "utf8mb4" if $params_map->{'db_encoding'} eq "utf8";
161
162    # these are the params for connecting to MySQL
163    my $db_driver = $params_map->{'db_driver'} || "mysql";
164    my $db_user = $params_map->{'db_client_user'} || "root";
165    my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef and password was necessary,
166                                     # we'll see a sensible error message when connect fails
167    my $db_host = $params_map->{'db_host'} || "127.0.0.1";
168        # localhost doesn't work for us, but 127.0.0.1 works
169        # https://metacpan.org/pod/DBD::mysql
170        # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
171        # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
172        # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
173    my $db_port = $params_map->{'db_port'}; # leave as undef if unspecified,
174                 # as our tests never used port anyway (must have internally
175                 # defaulted to whatever default port is used for MySQL)
176
177   
178    #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
179    # But don't provide db now - this allows checking the db exists later when loading the db
180    my $connect_str = "dbi:$db_driver:host=$db_host";
181    $connect_str .= ";port=$db_port" if $db_port;
182
183    if($params_map->{'verbosity'}) {
184    print STDERR "Away to make connection to $db_driver database with:\n";
185    print STDERR " - hostname $db_host; username: $db_user";
186    print STDERR "; and the provided password" if $db_pwd;
187    print STDERR "\nAssuming the mysql server has been started with: --character_set_server=utf8mb4\n" if $db_driver eq "mysql";
188    }
189   
190    my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
191               {
192                   ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
193                   PrintError => 1, # on by default, but being explicit
194                   RaiseError => 0, # off by default, but being explicit
195                   AutoCommit => 1, # on by default, but being explicit
196                   mysql_enable_utf8mb4 => 1 # tells MySQL to use UTF-8 for communication and tells DBD::mysql to decode the data, see https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
197               });
198
199    if(!$dbh) {
200    # NOTE, despite handle dbh being undefined, error code will be in DBI->err (note caps)
201    return 0;   
202    }
203
204    # set encoding https://metacpan.org/pod/DBD::mysql
205    # https://dev.mysql.com/doc/refman/5.7/en/charset.html
206    # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
207    # Setting the encoding at db server level: $dbh->do("set NAMES '" . $db_enc . "'");
208    # HOWEVER:
209    # It turned out insufficient setting the encoding to utf8, as that only supports utf8 chars that
210    # need up to 3 bytes. We may need up to 4 bytes per utf8 character, e.g. chars with macron,
211    # and for that, we need the encoding to be set to utf8mb4.
212    # To set up a MySQL db to use utf8mb4 requires configuration on the server side too.
213    # https://stackoverflow.com/questions/10957238/incorrect-string-value-when-trying-to-insert-utf-8-into-mysql-via-jdbc
214    # https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
215    # To set up the db for utf8mb4, therefore,
216    # the MySQL server needs to be configured for that char encoding by running the server as:
217    # mysql-5.7.23-linux-glibc2.12-x86_64/bin>./mysqld_safe --datadir=/Scratch/ak19/mysql/data --character_set_server=utf8mb4
218    # AND when connecting to the server, we can can either set mysql_enable_utf8mb4 => 1
219    # as a connection option
220    # OR we need to do both "set NAMES utf8mb4" AND "$dbh->{mysql_enable_utf8mb4} = 1;" after connecting
221    #
222    # Search results for DBI Set Names imply the "SET NAMES '<enc>'" command is mysql specific too,
223    # so setting the mysql specific option during connection above as "mysql_enable_utf8mb4 => 1"
224    # is no more objectionable. It has the advantage of cutting out the 2 extra lines of doing
225    # set NAMES '<enc>' and $dbh->{mysql_enable_utf8mb4} = 1 here.
226    # These lines may be preferred if more db_driver options are to be supported in future:
227    # then a separate method called set_db_encoding($enc) can work out what db_driver we're using
228    # and if mysql and enc=utfy, then it can do the following whereas it will issue other do stmts
229    # for other db_drivers, see https://www.perlmonks.org/?node_id=259456:
230   
231    #my $stmt = "set NAMES '" . $db_enc . "'";
232    #$dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n"); # tells MySQL to use UTF-8 for communication
233    #$dbh->{mysql_enable_utf8mb4} = 1; # tells DBD::mysql to decode the data
234   
235    # if we're here, then connection succeeded, store handle
236    $_dbh_instance = $dbh;
237    return $_dbh_instance;
238   
239}
240
241# Will disconnect if this instance of gssql holds the last reference to the db connection
242sub finished {
243    my $self= shift (@_);
244
245    $ref_count--;
246    if($ref_count == 0) {
247    $self->force_disconnect_from_db();
248    }   
249}
250
251# Call this method on die(), so that you're sure the perl process has disconnected from SQL db
252# Disconnect from db - https://metacpan.org/pod/DBI#disconnect
253# TODO: make sure to have committed or rolled back before disconnect
254# and that you've call finish() on statement handles if any fetch remnants remain
255sub force_disconnect_from_db {
256    my $self= shift (@_);
257
258    if($_dbh_instance) {
259    # make sure any active stmt handles are finished
260    # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
261   
262    print STDERR "    GSSQL disconnecting from database\n";
263    # Just go through the singleton db handle to disconnect
264    $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
265    $_dbh_instance = undef;
266    }
267    # Number of gssql objects that share a live connection is now 0, as the connection's dead
268    # either because the last gssql object finished() or because connection was killed (force)
269    $ref_count = 0;
270}
271
272
273# Load the designated database, i.e. 'use <dbname>;'.
274# If the database doesn't yet exist, creates it and loads it.
275# (Don't create the collection's tables yet, though)
276# At the end it will have loaded the requested database (in MySQL: "use <db>;") on success.
277# As usual, returns success or failure value that can be evaluated in a boolean context.
278sub use_db {
279    my $self= shift (@_);
280    my ($db_name) = @_;
281    my $dbh = $self->{'db_handle'};
282    $db_name = $self->sanitize_name($db_name);
283   
284    print STDERR "Attempting to use database $db_name\n" if($self->{'verbosity'});
285   
286    # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
287    # do() returns undef on error.
288    # connection succeeded, try to load our database. If that didn't work, attempt to create db
289    my $success = $dbh->do("use $db_name");
290   
291    if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
292
293    print STDERR "Database $db_name didn't exist, creating it along with the tables for the current collection...\n" if($self->{'verbosity'});
294   
295    # attempt to create the db and its tables
296    $self->create_db($db_name) || return 0;
297
298    print STDERR "   Created database $db_name\n" if($self->{'verbosity'} > 1);
299   
300    # once more attempt to use db, now that it exists
301    $dbh->do("use $db_name") || return 0;
302    #$dbh->do("use $db_name") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
303
304    $success = 1;
305    }
306    elsif($success) { # database existed and loaded successfully, but
307    # before proceeding check that the current collection's tables exist
308
309    print STDERR "@@@ DATABASE $db_name EXISTED\n" if($self->{'verbosity'} > 2);
310    }
311   
312    return $success; # could still return 0, if database failed to load with an error code != 1049
313}
314
315
316# We should already have done "use <database>;" if this gets called.
317# Just load this collection's metatable
318sub ensure_meta_table_exists {
319    my $self = shift (@_);
320   
321    my $tablename = $self->get_metadata_table_name();
322    if(!$self->table_exists($tablename)) {
323    #print STDERR "   Creating metadata table $tablename\n" if($self->{'verbosity'} > 1);
324    $self->create_metadata_table() || return 0;
325    } else {
326    print STDERR "@@@ Meta table exists\n" if($self->{'verbosity'} > 2);
327    }
328    return 1;
329}
330
331# We should already have done "use <database>;" if this gets called.
332# Just load this collection's metatable
333sub ensure_fulltxt_table_exists {
334    my $self = shift (@_);
335   
336    my $tablename = $self->get_fulltext_table_name();   
337    if(!$self->table_exists($tablename)) {
338    #print STDERR "   Creating fulltxt table $tablename\n" if($self->{'verbosity'} > 1);
339    $self->create_fulltext_table() || return 0;
340    } else {
341    print STDERR "@@@ Fulltxt table exists\n" if($self->{'verbosity'} > 2);
342    }
343    return 1;
344}
345
346
347sub create_db {
348    my $self= shift (@_);
349    my ($db_name) = @_;
350    my $dbh = $self->{'db_handle'};
351    $db_name = $self->sanitize_name($db_name);
352   
353    # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
354    return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
355}
356
357
358sub create_metadata_table {
359    my $self= shift (@_);
360    my $dbh = $self->{'db_handle'};
361   
362    my $table_name = $self->get_metadata_table_name();
363    print STDERR "   Creating table $table_name\n" if($self->{'verbosity'} > 1);
364   
365    # If using an auto incremented primary key:
366    my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
367    return $dbh->do($stmt);
368}
369
370# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
371# 12.9.1 Natural Language Full-Text Searches
372# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
373# or let user edit this file, or add it as another option
374sub create_fulltext_table {
375    my $self= shift (@_);
376    my $dbh = $self->{'db_handle'};
377   
378    my $table_name = $self->get_fulltext_table_name();
379    print STDERR "   Creating table $table_name\n" if($self->{'verbosity'} > 1);
380   
381    # If using an auto incremented primary key:
382    my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
383    return $dbh->do($stmt);
384
385}
386
387
388sub delete_collection_tables {
389    my $self= shift (@_);
390    my $dbh = $self->{'db_handle'};
391
392    # drop table <tablename>
393    my $table = $self->get_metadata_table_name();
394    if($self->table_exists($table)) {
395    $dbh->do("drop table $table");# || warn("@@@ Couldn't delete $table");
396    }
397    $table = $self->get_fulltext_table_name();
398    if($self->table_exists($table)) {
399    $dbh->do("drop table $table");# || warn("@@@ Couldn't delete $table");
400    }
401}
402
403# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
404# This method is just here for debugging (for testing creating a database when there is none)
405#
406# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
407# MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
408# https://dev.mysql.com/doc/en/drop-database.html
409sub _delete_database {
410    my $self= shift (@_);
411    my ($db_name) = @_;
412    my $dbh = $self->{'db_handle'};
413    $db_name = $self->sanitize_name($db_name);
414   
415    print STDERR "!!! Deleting database $db_name\n" if($self->{'verbosity'});
416   
417    # "drop database dbname"
418    $dbh->do("drop database $db_name") || return 0;
419
420    return 1;
421}
422
423
424########################### DB STATEMENTS ###########################
425
426# USEFUL: https://metacpan.org/pod/DBI
427# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
428
429# More efficient to use prepare() to prepare an SQL statement once and then execute() it many times
430# (binding different values to placeholders) than running do() which will prepare each time and
431# execute each time. Also, do() is not useful with SQL select statements as it doesn't fetch rows.
432# Can prepare and cache prepared statements or retrieve prepared statements if cached in one step:
433# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#prepare_cached
434
435# https://www.guru99.com/insert-into.html
436# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
437#     for inserting multiple rows at once
438# https://www.perlmonks.org/bare/?node_id=316183
439# https://metacpan.org/pod/DBI#do
440# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
441# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
442
443# https://metacpan.org/pod/DBI#performance
444# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
445#
446# This method uses lazy loading to prepare the SQL insert stmt once for a table and store it,
447# then execute the (stored) statement each time it's needed for that table.
448sub insert_row_into_metadata_table {
449    my $self = shift (@_);
450    my ($doc_oid, $section_name, $meta_name, $escaped_meta_value, $debug_only) = @_;
451   
452    my $dbh = $self->{'db_handle'};
453   
454    my $tablename = $self->get_metadata_table_name();
455    my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)});# || warn("Could not prepare insert statement for metadata table\n");
456
457    # Now we're ready to execute the command, unless we're only debugging
458   
459    if($debug_only) {
460    # just print the statement we were going to execute
461    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n";
462    }
463    else {
464    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n" if $self->{'verbosity'} > 2;
465   
466    $sth->execute($doc_oid, $section_name, $meta_name, $escaped_meta_value)
467        || warn ("Unable to write metadata row to db:\n\tOID $doc_oid, section $section_name,\n\tmeta name: $meta_name, val: $escaped_meta_value");
468    # Execution failure will print out info anyway: since db connection sets PrintError
469    }
470}
471
472# As above. Likewise uses lazy loading to prepare the SQL insert stmt once for a table and store it,
473# then execute the (stored) statement each time it's needed for that table.
474sub insert_row_into_fulltxt_table {
475    my $self = shift (@_);
476    #my ($did, $sid, $fulltext) = @_;
477    my ($doc_oid, $section_name, $section_textref, $debug_only) = @_;
478   
479    my $dbh = $self->{'db_handle'};
480   
481    my $tablename = $self->get_fulltext_table_name();
482    my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)});# || warn("Could not prepare insert statement for fulltxt table\n");
483   
484    # Now we're ready to execute the command, unless we're only debugging
485
486    # don't display the fulltxt value as it could be too long
487    my $txt_repr = $$section_textref ? "<TXT>" : "NULL";   
488    if($debug_only) { # only print statement, don't execute it
489    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n";
490    }
491    else { 
492    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n" if $self->{'verbosity'} > 2;
493   
494    $sth->execute($doc_oid, $section_name, $$section_textref)
495        || warn ("Unable to write fulltxt row to db for row:\n\tOID $doc_oid, section $section_name"); # Execution failure will print out info anyway: since db connection sets PrintError
496    }
497}
498
499
500## The 2 select statements used by GreenstoneSQLPlugin
501
502# Using fetchall_arrayref on statement handle, to run on prepared and executed stmt
503#   https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
504# instead of selectall_arrayref on database handle which will prepare, execute and fetch
505#   https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#selectall_arrayref
506#
507# Returns the statement handle that prepared and executed
508# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
509# Caller can call fetchrow_array() on returned statement handle, $sth
510# Have to use prepare() and execute() instead of do() since do() does
511# not allow for fetching result set thereafter:
512# do(): "This method  is typically most useful for non-SELECT statements that either cannot be prepared in advance (due to a limitation of the driver) or do not need to be executed repeatedly. It should not be used for SELECT statements because it does not return a statement handle (so you can't fetch any data)." https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#do
513sub select_from_metatable_matching_docid {
514    my $self= shift (@_);
515    my ($oid, $outhandle) = @_;
516   
517    my $dbh = $self->{'db_handle'};
518    my $tablename = $self->get_metadata_table_name();
519   
520    my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
521    $sth->execute( $oid ); # will print msg on fail
522
523    print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
524    if ($self->{'verbosity'} > 2);
525   
526    my $rows_ref = $sth->fetchall_arrayref();
527    # "If an error occurs, fetchall_arrayref returns the data fetched thus far, which may be none.
528    # You should check $sth->err afterwards (or use the RaiseError attribute) to discover if the
529    # data is complete or was truncated due to an error."
530    # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
531    # https://www.oreilly.com/library/view/programming-the-perl/1565926994/ch04s05.html
532    warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
533    return $rows_ref;
534}
535
536
537# See select_from_metatable_matching_docid() above.
538# Returns the statement handle that prepared and executed
539# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
540# Caller can call fetchrow_array() on returned statement handle, $sth
541sub select_from_texttable_matching_docid {
542    my $self= shift (@_);
543    my ($oid, $outhandle) = @_;
544   
545    my $dbh = $self->{'db_handle'};
546    my $tablename = $self->get_fulltext_table_name();
547   
548    my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
549    $sth->execute( $oid ); # will print msg on fail
550   
551    print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
552    if ($self->{'verbosity'} > 2);
553   
554    my $rows_ref = $sth->fetchall_arrayref();
555    # Need explicit warning:
556    warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
557    return $rows_ref;
558
559}
560
561# delete all records in metatable with specified docid
562# https://www.tutorialspoint.com/mysql/mysql-delete-query.htm
563# DELETE FROM table_name [WHERE Clause]
564# see example under 'do' at https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm
565sub delete_recs_from_metatable_with_docid {
566    my $self= shift (@_);
567    my ($oid) = @_;
568   
569    my $dbh = $self->{'db_handle'};
570
571    my $tablename = $self->get_metadata_table_name();
572    my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
573    $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
574}
575
576# delete all records in metatable with specified docid
577sub delete_recs_from_texttable_with_docid {
578    my $self= shift (@_);
579    my ($oid) = @_;
580   
581    my $dbh = $self->{'db_handle'};
582
583    my $tablename = $self->get_fulltext_table_name();
584    my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
585    $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
586}
587
588# Can call this after connection succeeded to get the database handle, dbh,
589# if any specific DB operation (SQL statement, create/delete)
590# needs to be executed that is not already provided as a method of this class.
591sub get_db_handle {
592    my $self= shift (@_);
593    return $self->{'db_handle'};
594}
595
596################ HELPER METHODS ##############
597
598# More basic helper methods
599sub get_metadata_table_name {
600    my $self= shift (@_);
601    my $table_name = $self->{'tablename_prefix'} . "_metadata";
602    return $table_name;
603}
604
605# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
606# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
607sub get_fulltext_table_name {
608    my $self= shift (@_);
609    my $table_name = $self->{'tablename_prefix'} . "_fulltxt";
610    return $table_name;
611}
612
613# Attempt to make sure the name parameter (for db or table name) is acceptable syntax
614# for the db in question, e.g. for mysql. For example, (My)SQL doesn't like tables or
615# databases with '-' (hyphens) in their names
616sub sanitize_name {
617    my $self= shift (@_);
618    my ($name) = @_;
619    $name =~ s/-/_/g;
620    return $name;
621}
622
623
624# I can get my version of table_exists to work, but it's not so ideal
625# Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
626# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
627#    The page further has a table_exists function that could work with proper comparison
628# TODO Q: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
629sub table_exists {
630    my $self = shift (@_);
631    my $dbh = $self->{'db_handle'};
632    my ($table_name) = @_;
633
634    my @table_list = $dbh->tables;
635    #my $tables_str = @table_list[0];
636    foreach my $table (@table_list) {
637    return 1 if ($table =~ m/$table_name/);
638    }
639    return 0;
640}
641
6421;
Note: See TracBrowser for help on using the browser.