root/main/trunk/greenstone2/perllib/gssql.pm @ 32585

Revision 32585, 34.5 KB (checked in by ak19, 10 months ago)

Super call in destructor. More important if subclassing.

Line 
1###########################################################################
2#
3# gssql.pm -- DBI for SQL related utility functions used by
4# GreenstoneSQLPlugout and GreenstoneSQLPlugin too.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gssql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use DBI; # the central package for this module used by GreenstoneSQL Plugout and Plugin
34
35
36##############################
37
38# TODO: add infrastructure for db_port, AutoCommit etc
39# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
40
41# + TODO: remove unnecessary warn() since PrintError is active
42# https://perldoc.perl.org/perlobj.html#Destructors
43
44# TODO: drop table if exists and create table if exists are available in MySQL. Use those cmds
45# instead of always first checking for existence ourselves? Only when subclassing to specific
46# mysql class.
47##############################
48
49# singleton connection
50my $_dbh_instance = undef; # calls undef() function. See https://perlmaven.com/undef-and-defined-in-perl
51my $ref_count = 0;
52
53# Need params_map keys:
54# - collection_name
55# - db_encoding (db content encoding) - MySQL can set this at server, db, table levels. For MySQL
56# we set the enc during connect at server level. Not sure whether other DB's support it at the
57# same levels.
58
59# For connection to MySQL, need:
60#  - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
61# So these will be parameterised, but in a hashmap, for just the connect method.
62
63# Parameterise (one or more methods may use them):
64# - db_name (which is the GS3 sitename, or "greenstone2" for GS2)
65
66
67
68$SIG{INT}  = \&finish_signal_handler;
69$SIG{TERM}  = \&finish_signal_handler;
70$SIG{KILL}  = \&finish_signal_handler;
71
72sub finish_signal_handler {
73    my ($sig) = @_; # one of INT|KILL|TERM
74
75    if ($_dbh_instance) { # database handle (note, using singleton) still active.
76   
77    # TODO: If autocommit wasn't set, then this is a cancel operation.
78    # If we've not disconnected from the sql db yet and if we've not committed
79    # transactions yet, then cancel means we do a rollback here
80   
81    if($_dbh_instance->{AutoCommit} == 0) {
82        print STDERR "   User cancelled: rolling back SQL database transaction.\n";
83        $_dbh_instance->rollback(); # will warn on failure, nothing more we can/want to do,
84    }
85    }
86   
87    die "Caught a $sig signal $!"; # die() will always call destructor (sub DESTROY)
88}
89
90sub new
91
92    my $class = shift(@_);
93   
94    my ($params_map) = @_;
95   
96    # library_url: to be specified on the cmdline if not using a GS-included web server
97    # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
98   
99    # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
100    # Making a shallow copy works, and can handle unknown params:
101    #my $self = $params_map;
102
103    # but being explicit for class params needed for MySQL:
104    my $self = {
105    'collection_name' => $params_map->{'collection_name'},
106    'verbosity' => $params_map->{'verbosity'} || 1
107    };
108
109    # The db_encoding option is presently not passed in to this constructor as parameter.
110    # Placed here to indicate it's sort of optional.
111    # Since docxml are all in utf8, the contents of the GS SQL database should be too,
112    # So making utf8 the hidden default at present.
113    $self->{'db_encoding'} = $params_map->{'db_encoding'} || "utf8";
114   
115    $self = bless($self, $class);   
116
117    $self->{'tablename_prefix'} = $self->sanitize_name($params_map->{'collection_name'});
118   
119    return $self;
120}
121
122# On die(), an object's destructor is called.
123# See https://www.perl.com/article/37/2013/8/18/Catch-and-Handle-Signals-in-Perl/
124# We want to ensure we've closed the db connection in such cases.
125# "It’s common to call die when handling SIGINT and SIGTERM. die is useful because it will ensure that Perl stops correctly: for example Perl will execute a destructor method if present when die is called, but the destructor method will not be called if a SIGINT or SIGTERM is received and no signal handler calls die."
126#
127# Useful: https://perldoc.perl.org/perlobj.html#Destructors
128#
129# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#disconnect
130# "Disconnects the database from the database handle. disconnect is typically only used before exitin# g the program. The handle is of little use after disconnecting.
131#
132# The transaction behaviour of the disconnect method is, sadly, undefined. Some database systems (such as Oracle and Ingres) will automatically commit any outstanding changes, but others (such as Informix) will rollback any outstanding changes. Applications not using AutoCommit should explicitly call commit or rollback before calling disconnect.
133#
134# The database is automatically disconnected by the DESTROY method if still connected when there are no longer any references to the handle. The DESTROY method for each driver should implicitly call rollback to undo any uncommitted changes. This is vital behaviour to ensure that incomplete transactions don't get committed simply because Perl calls DESTROY on every object before exiting. Also, do not rely on the order of object destruction during "global destruction", as it is undefined.
135#
136# Generally, if you want your changes to be committed or rolled back when you disconnect, then you should explicitly call "commit" or "rollback" before disconnecting.
137#
138# If you disconnect from a database while you still have active statement handles (e.g., SELECT statement handles that may have more data to fetch), you will get a warning. The warning may indicate that a fetch loop terminated early, perhaps due to an uncaught error. To avoid the warning call the finish method on the active handles."
139#
140sub DESTROY {
141    my $self = shift;
142   
143    if (${^GLOBAL_PHASE} eq 'DESTRUCT') {
144   
145    if ($_dbh_instance) { # database handle still active. Use singleton handle!
146
147        # rollback code has moved to finish_signal_handler() where it belongs?
148       
149        # NOTE: if RaiseError is set on dbi connection, then on any error, perl process will die()
150        # which will end up calling this DESTROY. If it was a die() that called DESTROY
151        # then need to rollback the db here. However, if it was not a die() but natural termination
152        # of the perl process, destroy() will also get called. In that case we don't want to rollback
153        # but do a commit() to the DB instead.
154        # Perhaps detecting the difference may be accomplished by checking ref_count:
155        # - If ref_count not 0 it may require a rollback?
156        # - If ref_count 0 it may be a natural termination and require a commit? Except that ref_count
157        # is set back to 0 in finished(), which will do the commit when ref_count becomes 0. So shouldn't
158        # (have to) do that here.
159       
160        # We're now finally ready to disconnect, as is required for both natural and premature termination
161        print STDERR "XXXXXXXX Global Destruct: Disconnecting from database\n";
162        $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
163        $_dbh_instance = undef;
164        $ref_count = 0;
165    }
166    return;
167    }
168
169    # "Always include a call to $self->SUPER::DESTROY in our destructors (even if we don't yet have any base/parent classes). (p. 145)"
170    # Superclass and destroy, call to SUPER: https://www.perlmonks.org/?node_id=879920
171    # also covers multiple-inheritance (MI)   
172    $self->SUPER::DESTROY if $self->can("SUPER::DESTROY");
173
174}
175
176#################################
177
178# Database access related functions
179# http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
180# https://www.guru99.com/insert-into.html
181
182# TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
183# Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
184# https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
185# BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
186# The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
187# Unless they do a full rebuild, which will recreate the table from scratch?
188# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
189# But then should set AutoCommit to off on connection, and remember to commit every time
190
191#################
192# Database functions that use the perl DBI module (with the DBD driver module for mysql)
193#################
194
195################### BASIC DB OPERATIONS ##################
196
197# THE NEW DB FUNCTIONS
198# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
199# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
200
201# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
202
203
204
205# SINGLETON / GET INSTANCE PATTERN
206# https://stackoverflow.com/questions/16655603/perl-objects-class-variable-initialization
207# https://stackoverflow.com/questions/7587157/how-can-i-set-a-static-variable-that-can-be-accessed-by-all-subclasses-of-the-sa
208# Singleton without Moose: https://www.perl.com/article/52/2013/12/11/Implementing-the-singleton-pattern-in-Perl/
209
210sub connect_to_db
211{
212    my $self= shift (@_);
213    my ($params_map) = @_;
214   
215    $params_map->{'db_encoding'} = $self->{'db_encoding'};
216    $params_map->{'verbosity'} = $self->{'verbosity'};
217   
218    $self->{'db_handle'} = &_get_connection_instance($params_map); # getting singleton (class method)
219    if($self->{'db_handle'}) {
220    $ref_count++; # if successful, keep track of the number of refs to the single db connection
221    return $self->{'db_handle'};
222    }
223    return undef;
224}
225
226# SINGLETON METHOD #
227# TODO: where should the defaults for these params be, here or in GS-SQLPlugin/Plugout?
228sub _get_connection_instance
229{
230    #my $self= shift (@_); # singleton method doesn't use self, but callers don't need to know that
231    my ($params_map) = @_;
232   
233    if($params_map->{'verbosity'}) {
234    if(!defined $params_map->{'autocommit'}) {
235        print STDERR "  Autocommit parameter not defined\n";
236    }
237    if($params_map->{'autocommit'}) {
238        print STDERR "   SQL DB CANCEL SUPPORT OFF.\n";
239    } else {
240        print STDERR "   SQL DB CANCEL SUPPORT ON.\n";
241    }
242    }
243   
244    return $_dbh_instance if($_dbh_instance);
245
246    # or make the connection
247   
248    # For proper utf8 support in MySQL, encoding should be 'utf8mb4' as 'utf8' is insufficient
249    my $db_enc = "utf8mb4" if $params_map->{'db_encoding'} eq "utf8";
250
251    # these are the params for connecting to MySQL
252    my $db_driver = $params_map->{'db_driver'} || "mysql";
253    my $db_user = $params_map->{'db_client_user'} || "root";
254    my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef and password was necessary,
255                                     # we'll see a sensible error message when connect fails
256    my $db_host = $params_map->{'db_host'} || "127.0.0.1";
257        # localhost doesn't work for us, but 127.0.0.1 works
258        # https://metacpan.org/pod/DBD::mysql
259        # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
260        # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
261        # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
262    my $db_port = $params_map->{'db_port'}; # leave as undef if unspecified,
263                 # as our tests never used port anyway (must have internally
264                 # defaulted to whatever default port is used for MySQL)
265
266   
267    #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
268    # But don't provide db now - this allows checking the db exists later when loading the db
269    my $connect_str = "dbi:$db_driver:host=$db_host";
270    $connect_str .= ";port=$db_port" if $db_port;
271
272    if($params_map->{'verbosity'}) {
273    print STDERR "Away to make connection to $db_driver database with:\n";
274    print STDERR " - hostname $db_host; username: $db_user";
275    print STDERR "; and the provided password" if $db_pwd;
276    print STDERR "\nAssuming the mysql server has been started with: --character_set_server=utf8mb4\n" if $db_driver eq "mysql";
277    }
278
279    # DBI AutoCommit connection param is on/1 by default, so if a value for this is not defined
280    # as a method parameter to _get_connection_instance, then fallback to the default of on/1
281    my $autocommit = (defined $params_map->{'autocommit'}) ? $params_map->{'autocommit'} : 1;
282   
283    my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
284               {
285                   ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
286                   PrintError => 1, # on by default, but being explicit
287                   RaiseError => 0, # off by default, but being explicit
288                   AutoCommit => $autocommit,
289                   mysql_enable_utf8mb4 => 1 # tells MySQL to use UTF-8 for communication and tells DBD::mysql to decode the data, see https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
290               });
291
292    if(!$dbh) {
293    # NOTE, despite handle dbh being undefined, error code will be in DBI->err (note caps)
294    return 0;   
295    }
296
297    # set encoding https://metacpan.org/pod/DBD::mysql
298    # https://dev.mysql.com/doc/refman/5.7/en/charset.html
299    # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
300    # Setting the encoding at db server level: $dbh->do("set NAMES '" . $db_enc . "'");
301    # HOWEVER:
302    # It turned out insufficient setting the encoding to utf8, as that only supports utf8 chars that
303    # need up to 3 bytes. We may need up to 4 bytes per utf8 character, e.g. chars with macron,
304    # and for that, we need the encoding to be set to utf8mb4.
305    # To set up a MySQL db to use utf8mb4 requires configuration on the server side too.
306    # https://stackoverflow.com/questions/10957238/incorrect-string-value-when-trying-to-insert-utf-8-into-mysql-via-jdbc
307    # https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug
308    # To set up the db for utf8mb4, therefore,
309    # the MySQL server needs to be configured for that char encoding by running the server as:
310    # mysql-5.7.23-linux-glibc2.12-x86_64/bin>./mysqld_safe --datadir=/Scratch/ak19/mysql/data --character_set_server=utf8mb4
311    # AND when connecting to the server, we can can either set mysql_enable_utf8mb4 => 1
312    # as a connection option
313    # OR we need to do both "set NAMES utf8mb4" AND "$dbh->{mysql_enable_utf8mb4} = 1;" after connecting
314    #
315    # Search results for DBI Set Names imply the "SET NAMES '<enc>'" command is mysql specific too,
316    # so setting the mysql specific option during connection above as "mysql_enable_utf8mb4 => 1"
317    # is no more objectionable. It has the advantage of cutting out the 2 extra lines of doing
318    # set NAMES '<enc>' and $dbh->{mysql_enable_utf8mb4} = 1 here.
319    # These lines may be preferred if more db_driver options are to be supported in future:
320    # then a separate method called set_db_encoding($enc) can work out what db_driver we're using
321    # and if mysql and enc=utfy, then it can do the following whereas it will issue other do stmts
322    # for other db_drivers, see https://www.perlmonks.org/?node_id=259456:
323   
324    #my $stmt = "set NAMES '" . $db_enc . "'";
325    #$dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n"); # tells MySQL to use UTF-8 for communication
326    #$dbh->{mysql_enable_utf8mb4} = 1; # tells DBD::mysql to decode the data
327   
328    # if we're here, then connection succeeded, store handle
329    $_dbh_instance = $dbh;
330    return $_dbh_instance;
331   
332}
333
334# Will disconnect if this instance of gssql holds the last reference to the db connection
335# If disconnecting and autocommit is off, then this will commit before disconnecting
336sub finished {
337    my $self= shift (@_);
338    my $dbh = $self->{'db_handle'};
339   
340    my $rc = 1; # return code: everything went fine, regardless of whether we needed to commit
341                # (AutoCommit on or off)
342   
343    $ref_count--;
344    if($ref_count == 0) { # Only commit transaction when we're about to actually disconnect, not before
345   
346    # TODO: If AutoCommit was off, meaning transactions were on/enabled,
347    # then here is where we commit our one long transaction.
348    # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit
349    if($dbh->{AutoCommit} == 0) {
350        print STDERR "   Committing transaction to SQL database now.\n" if $self->{'verbosity'};
351        $rc = $dbh->commit() or warn("SQL DB COMMIT FAILED: " . $dbh->errstr); # important problem
352        # worth embellishing error message
353    }
354    # else if autocommit was on, then we'd have committed after every db operation, so nothing to do
355   
356    $self->_force_disconnect_from_db();
357    }
358
359    return $rc;
360}
361
362
363# Call this method on die(), so that you're sure the perl process has disconnected from SQL db
364# Disconnect from db - https://metacpan.org/pod/DBI#disconnect
365# TODO: make sure to have committed or rolled back before disconnect
366# and that you've call finish() on statement handles if any fetch remnants remain
367sub _force_disconnect_from_db {
368    my $self= shift (@_);
369
370    if($_dbh_instance) {
371    # make sure any active stmt handles are finished
372    # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
373   
374    print STDERR "    GSSQL disconnecting from database\n" if $self->{'verbosity'};
375    # Just go through the singleton db handle to disconnect
376    $_dbh_instance->disconnect or warn $_dbh_instance->errstr;
377    $_dbh_instance = undef;
378    }
379    # Number of gssql objects that share a live connection is now 0, as the connection's dead
380    # either because the last gssql object finished() or because connection was killed (force)
381    $ref_count = 0;
382}
383
384
385# Load the designated database, i.e. 'use <dbname>;'.
386# If the database doesn't yet exist, creates it and loads it.
387# (Don't create the collection's tables yet, though)
388# At the end it will have loaded the requested database (in MySQL: "use <db>;") on success.
389# As usual, returns success or failure value that can be evaluated in a boolean context.
390sub use_db {
391    my $self= shift (@_);
392    my ($db_name) = @_;
393    my $dbh = $self->{'db_handle'};
394    $db_name = $self->sanitize_name($db_name);
395   
396    print STDERR "Attempting to use database $db_name\n" if($self->{'verbosity'});
397   
398    # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
399    # do() returns undef on error.
400    # connection succeeded, try to load our database. If that didn't work, attempt to create db
401    my $success = $dbh->do("use $db_name");
402   
403    if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
404
405    print STDERR "Database $db_name didn't exist, creating it along with the tables for the current collection...\n" if($self->{'verbosity'});
406   
407    # attempt to create the db and its tables
408    $self->create_db($db_name) || return 0;
409
410    print STDERR "   Created database $db_name\n" if($self->{'verbosity'} > 1);
411   
412    # once more attempt to use db, now that it exists
413    $dbh->do("use $db_name") || return 0;
414    #$dbh->do("use $db_name") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
415
416    $success = 1;
417    }
418    elsif($success) { # database existed and loaded successfully, but
419    # before proceeding check that the current collection's tables exist
420
421    print STDERR "@@@ DATABASE $db_name EXISTED\n" if($self->{'verbosity'} > 2);
422    }
423   
424    return $success; # could still return 0, if database failed to load with an error code != 1049
425}
426
427
428# We should already have done "use <database>;" if this gets called.
429# Just load this collection's metatable
430sub ensure_meta_table_exists {
431    my $self = shift (@_);
432   
433    my $tablename = $self->get_metadata_table_name();
434    if(!$self->table_exists($tablename)) {
435    $self->create_metadata_table() || return 0;
436    } else {
437    print STDERR "@@@ Meta table exists\n" if($self->{'verbosity'} > 2);
438    }
439    return 1;
440}
441
442# We should already have done "use <database>;" if this gets called.
443# Just load this collection's metatable
444sub ensure_fulltxt_table_exists {
445    my $self = shift (@_);
446   
447    my $tablename = $self->get_fulltext_table_name();   
448    if(!$self->table_exists($tablename)) {
449    $self->create_fulltext_table() || return 0;
450    } else {
451    print STDERR "@@@ Fulltxt table exists\n" if($self->{'verbosity'} > 2);
452    }
453    return 1;
454}
455
456
457sub create_db {
458    my $self= shift (@_);
459    my ($db_name) = @_;
460    my $dbh = $self->{'db_handle'};
461    $db_name = $self->sanitize_name($db_name);
462   
463    # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
464    return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
465}
466
467
468sub create_metadata_table {
469    my $self= shift (@_);
470    my $dbh = $self->{'db_handle'};
471   
472    my $table_name = $self->get_metadata_table_name();
473    print STDERR "   Creating table $table_name\n" if($self->{'verbosity'} > 1);
474   
475    # If using an auto incremented primary key:
476    my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
477    return $dbh->do($stmt);
478}
479
480# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
481# 12.9.1 Natural Language Full-Text Searches
482# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
483# or let user edit this file, or add it as another option
484sub create_fulltext_table {
485    my $self= shift (@_);
486    my $dbh = $self->{'db_handle'};
487   
488    my $table_name = $self->get_fulltext_table_name();
489    print STDERR "   Creating table $table_name\n" if($self->{'verbosity'} > 1);
490   
491    # If using an auto incremented primary key:
492    my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
493    return $dbh->do($stmt);
494
495}
496
497
498sub delete_collection_tables {
499    my $self= shift (@_);
500    my $dbh = $self->{'db_handle'};
501
502    # drop table <tablename>
503    my $table = $self->get_metadata_table_name();
504    if($self->table_exists($table)) {
505    $dbh->do("drop table $table");# || warn("@@@ Couldn't delete $table");
506    }
507    $table = $self->get_fulltext_table_name();
508    if($self->table_exists($table)) {
509    $dbh->do("drop table $table");# || warn("@@@ Couldn't delete $table");
510    }
511
512    # TODO Q: commit here, so that future select statements work?
513    # See https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#Transactions
514}
515
516# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
517# This method is just here for debugging (for testing creating a database when there is none)
518#
519# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
520# MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
521# https://dev.mysql.com/doc/en/drop-database.html
522sub _delete_database {
523    my $self= shift (@_);
524    my ($db_name) = @_;
525    my $dbh = $self->{'db_handle'};
526    $db_name = $self->sanitize_name($db_name);
527   
528    print STDERR "!!! Deleting database $db_name\n" if($self->{'verbosity'});
529   
530    # "drop database dbname"
531    $dbh->do("drop database $db_name") || return 0;
532
533    return 1;
534}
535
536
537########################### DB STATEMENTS ###########################
538
539# USEFUL: https://metacpan.org/pod/DBI
540# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
541
542# More efficient to use prepare() to prepare an SQL statement once and then execute() it many times
543# (binding different values to placeholders) than running do() which will prepare each time and
544# execute each time. Also, do() is not useful with SQL select statements as it doesn't fetch rows.
545# Can prepare and cache prepared statements or retrieve prepared statements if cached in one step:
546# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#prepare_cached
547
548# https://www.guru99.com/insert-into.html
549# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
550#     for inserting multiple rows at once
551# https://www.perlmonks.org/bare/?node_id=316183
552# https://metacpan.org/pod/DBI#do
553# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
554# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
555
556# https://metacpan.org/pod/DBI#performance
557# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
558#
559# This method uses lazy loading to prepare the SQL insert stmt once for a table and store it,
560# then execute the (stored) statement each time it's needed for that table.
561sub insert_row_into_metadata_table {
562    my $self = shift (@_);
563    my ($doc_oid, $section_name, $meta_name, $escaped_meta_value, $debug_only) = @_;
564   
565    my $dbh = $self->{'db_handle'};
566   
567    my $tablename = $self->get_metadata_table_name();
568    my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)});# || warn("Could not prepare insert statement for metadata table\n");
569
570    # Now we're ready to execute the command, unless we're only debugging
571   
572    if($debug_only) {
573    # just print the statement we were going to execute
574    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n";
575    }
576    else {
577    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $meta_name, $escaped_meta_value)\n" if $self->{'verbosity'} > 2;
578   
579    $sth->execute($doc_oid, $section_name, $meta_name, $escaped_meta_value)
580        || warn ("Unable to write metadata row to db:\n\tOID $doc_oid, section $section_name,\n\tmeta name: $meta_name, val: $escaped_meta_value");
581    # Execution failure will print out info anyway: since db connection sets PrintError
582    }
583}
584
585# As above. Likewise uses lazy loading to prepare the SQL insert stmt once for a table and store it,
586# then execute the (stored) statement each time it's needed for that table.
587sub insert_row_into_fulltxt_table {
588    my $self = shift (@_);
589    #my ($did, $sid, $fulltext) = @_;
590    my ($doc_oid, $section_name, $section_textref, $debug_only) = @_;
591   
592    my $dbh = $self->{'db_handle'};
593   
594    my $tablename = $self->get_fulltext_table_name();
595    my $sth = $dbh->prepare_cached(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)});# || warn("Could not prepare insert statement for fulltxt table\n");
596   
597    # Now we're ready to execute the command, unless we're only debugging
598
599    # don't display the fulltxt value as it could be too long
600    my $txt_repr = $$section_textref ? "<TXT>" : "NULL";   
601    if($debug_only) { # only print statement, don't execute it
602    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n";
603    }
604    else { 
605    print STDERR $sth->{'Statement'} . "($doc_oid, $section_name, $txt_repr)\n" if $self->{'verbosity'} > 2;
606   
607    $sth->execute($doc_oid, $section_name, $$section_textref)
608        || warn ("Unable to write fulltxt row to db for row:\n\tOID $doc_oid, section $section_name"); # Execution failure will print out info anyway: since db connection sets PrintError
609    }
610}
611
612
613## The 2 select statements used by GreenstoneSQLPlugin
614
615# Using fetchall_arrayref on statement handle, to run on prepared and executed stmt
616#   https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
617# instead of selectall_arrayref on database handle which will prepare, execute and fetch
618#   https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#selectall_arrayref
619#
620# Returns the statement handle that prepared and executed
621# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
622# Caller can call fetchrow_array() on returned statement handle, $sth
623# Have to use prepare() and execute() instead of do() since do() does
624# not allow for fetching result set thereafter:
625# do(): "This method  is typically most useful for non-SELECT statements that either cannot be prepared in advance (due to a limitation of the driver) or do not need to be executed repeatedly. It should not be used for SELECT statements because it does not return a statement handle (so you can't fetch any data)." https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#do
626sub select_from_metatable_matching_docid {
627    my $self= shift (@_);
628    my ($oid, $outhandle) = @_;
629   
630    my $dbh = $self->{'db_handle'};
631    my $tablename = $self->get_metadata_table_name();
632   
633    my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
634    $sth->execute( $oid ); # will print msg on fail
635
636    print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
637    if ($self->{'verbosity'} > 2);
638   
639    my $rows_ref = $sth->fetchall_arrayref();
640    # "If an error occurs, fetchall_arrayref returns the data fetched thus far, which may be none.
641    # You should check $sth->err afterwards (or use the RaiseError attribute) to discover if the
642    # data is complete or was truncated due to an error."
643    # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#fetchall_arrayref
644    # https://www.oreilly.com/library/view/programming-the-perl/1565926994/ch04s05.html
645    warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
646    return $rows_ref;
647}
648
649
650# See select_from_metatable_matching_docid() above.
651# Returns the statement handle that prepared and executed
652# a "SELECT * FROM <COLL>_metadata WHERE did = $oid" SQL statement.
653# Caller can call fetchrow_array() on returned statement handle, $sth
654sub select_from_texttable_matching_docid {
655    my $self= shift (@_);
656    my ($oid, $outhandle) = @_;
657   
658    my $dbh = $self->{'db_handle'};
659    my $tablename = $self->get_fulltext_table_name();
660   
661    my $sth = $dbh->prepare_cached(qq{SELECT * FROM $tablename WHERE did = ?});
662    $sth->execute( $oid ); # will print msg on fail
663   
664    print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n"
665    if ($self->{'verbosity'} > 2);
666   
667    my $rows_ref = $sth->fetchall_arrayref();
668    # Need explicit warning:
669    warn("Data fetching from $tablename terminated early by error: " . $dbh->err) if $dbh->err;
670    return $rows_ref;
671
672}
673
674# delete all records in metatable with specified docid
675# https://www.tutorialspoint.com/mysql/mysql-delete-query.htm
676# DELETE FROM table_name [WHERE Clause]
677# see example under 'do' at https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm
678sub delete_recs_from_metatable_with_docid {
679    my $self= shift (@_);
680    my ($oid) = @_;
681   
682    my $dbh = $self->{'db_handle'};
683
684    my $tablename = $self->get_metadata_table_name();
685    my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
686    $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
687}
688
689# delete all records in metatable with specified docid
690sub delete_recs_from_texttable_with_docid {
691    my $self= shift (@_);
692    my ($oid) = @_;
693   
694    my $dbh = $self->{'db_handle'};
695
696    my $tablename = $self->get_fulltext_table_name();
697    my $sth = $dbh->prepare_cached(qq{DELETE FROM $tablename WHERE did = ?});
698    $sth->execute( $oid ) or warn $dbh->errstr; # dbh set to print errors even without doing warn()
699}
700
701# Can call this after connection succeeded to get the database handle, dbh,
702# if any specific DB operation (SQL statement, create/delete)
703# needs to be executed that is not already provided as a method of this class.
704sub get_db_handle {
705    my $self= shift (@_);
706    return $self->{'db_handle'};
707}
708
709################ HELPER METHODS ##############
710
711# More basic helper methods
712sub get_metadata_table_name {
713    my $self= shift (@_);
714    my $table_name = $self->{'tablename_prefix'} . "_metadata";
715    return $table_name;
716}
717
718# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
719# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
720sub get_fulltext_table_name {
721    my $self= shift (@_);
722    my $table_name = $self->{'tablename_prefix'} . "_fulltxt";
723    return $table_name;
724}
725
726# Attempt to make sure the name parameter (for db or table name) is acceptable syntax
727# for the db in question, e.g. for mysql. For example, (My)SQL doesn't like tables or
728# databases with '-' (hyphens) in their names
729sub sanitize_name {
730    my $self= shift (@_);
731    my ($name) = @_;
732    $name =~ s/-/_/g;
733    return $name;
734}
735
736
737# I can get my version of table_exists to work, but it's not so ideal
738# Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
739# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
740#    The page further has a table_exists function that could work with proper comparison
741# TODO Q: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
742sub table_exists {
743    my $self = shift (@_);
744    my $dbh = $self->{'db_handle'};
745    my ($table_name) = @_;
746
747    my @table_list = $dbh->tables;
748    #my $tables_str = @table_list[0];
749    foreach my $table (@table_list) {
750    return 1 if ($table =~ m/$table_name/);
751    }
752    return 0;
753}
754
7551;
Note: See TracBrowser for help on using the browser.